Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/77.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
根据其他变量的组合计算不同行中变量值的差异';R中不同行的值_R_Data.table_Panel Data - Fatal编程技术网

根据其他变量的组合计算不同行中变量值的差异';R中不同行的值

根据其他变量的组合计算不同行中变量值的差异';R中不同行的值,r,data.table,panel-data,R,Data.table,Panel Data,我在数据表中有id和period标识的不平衡面板数据。共有8278个观测值和230个变量 我想知道我的数据中的公司(由id标识)从计划进入市场(plan\u entry==“yes”,包含NAs)到实际进入市场需要多长时间(enter\u market==“yes”,包含noNAs) 因此,如果在期间==4一家公司计划进入市场,并且在期间==9最终进入市场,我想生成例如进入时间==5。数据的结构大致如下,并且已经包含了所需的输出变量。请注意,公司可能会进入一个市场,而无需说明之前的任何计划。也可

我在
数据表中有
id
period
标识的不平衡面板数据。共有8278个观测值和230个变量

我想知道我的数据中的公司(由
id
标识)从计划进入市场(
plan\u entry==“yes”
,包含
NA
s)到实际进入市场需要多长时间(
enter\u market==“yes”
,包含no
NA
s)

因此,如果在
期间==4
一家公司计划进入市场,并且在
期间==9
最终进入市场,我想生成例如
进入时间==5
。数据的结构大致如下,并且已经包含了所需的输出变量。请注意,公司可能会进入一个市场,而无需说明之前的任何计划。也可能是他们计划进入一个新市场,并在同一时期进入一个新市场。在这两种情况下,我都希望
time\u to\u entry==0
。如果一家公司从未进入任何市场,则该值也应为0

示例性数据和预期结果

library(data.table)

desired_output <- 
  data.table(id = as.factor(c(rep("C001", 3), "C002", rep("C003", 5), rep("C004", 2), rep("C005", 7))),
             period = as.factor(c(1, 2, 3, 2, 1, 4, 5, 6, 10, 3, 4, 2, 3, 4, 7, 8, 9, 10)),
             plan_entry = as.factor(c(rep(NA, 2), "yes", "no", NA, rep("no", 2), rep("yes", 4), rep(NA, 2), rep("yes", 4), "no")),
             enter_market = as.factor(c(rep("no", 3), "yes", rep("no", 5), rep("yes", 2), rep("no", 5), rep("yes", 2))),
             time_to_entry = c(rep(0, 10), 1, rep(0, 5), 5, 1))

desired_output
#      id period plan_entry enter_market time_to_entry
# 1: C001      1       <NA>           no             0
# 2: C001      2       <NA>           no             0
# 3: C001      3        yes           no             0
# 4: C002      2         no          yes             0
# 5: C003      1       <NA>           no             0
# 6: C003      4         no           no             0
# 7: C003      5         no           no             0
# 8: C003      6        yes           no             0
# 9: C003     10        yes           no             0
#10: C004      3        yes          yes             0     ! there might be cases 
# where companies enter a market without stating any plans to do so in previous periods
#11: C004      4        yes          yes             1 
#12: C005      2       <NA>           no             0
#13: C005      3       <NA>           no             0
#14: C005      4        yes           no             0
#15: C005      7        yes           no             0
#16: C005      8        yes           no             0
#17: C005      9        yes          yes             5
#18: C005     10         no          yes             1       

  • dt$plan_条目==“是”的所有期间

  • dt$plan\u entry\u period这里有一个解决方案,可以重现您的所需结果

    dt <- 
      data.table(id = as.factor(c(rep("C001", 3), "C002", rep("C003", 5), rep("C004", 2), rep("C005", 7))),
                 period = as.numeric(c(1, 2, 3, 2, 1, 4, 5, 6, 10, 3, 4, 2, 3, 4, 7, 8, 9, 10)),
                 plan_entry = as.factor(c(rep(NA, 2), "yes", "no", NA, rep("no", 2), rep("yes", 4), rep(NA, 2), rep("yes", 4), "no")),
                 enter_market = as.factor(c(rep("no", 3), "yes", rep("no", 5), rep("yes", 2), rep("no", 5), rep("yes", 2))))
    
    dt[, time_to_entry_with_plan := period - min(period),
       by = .(id,plan_entry)]
    
    dt[, time_to_entry_without_plan := period - min(period),
       by = .(id,enter_market)]
    
    dt[, time_to_entry:=fcase(enter_market == "yes" & plan_entry == "yes", time_to_entry_with_plan,
                              enter_market == "yes" & plan_entry == "no", time_to_entry_without_plan,
                              default = 0)]
    

    dt这里有一个使用非等联接的选项:

    #find the previous latest enter_market before current row
    DT[enter_market=="yes", prev_entry := 
        fcoalesce(.SD[.SD, on=.(id, period<period), mult="last", x.period], 0L)
    ]
    
    #non-equi join to find the first plan_entry before current enter_market but after previous latest enter_market
    DT[enter_market=="yes", plan_period :=
        DT[plan_entry=="yes"][.SD, on=.(id, period>=prev_entry, period<period), mult="first", x.period]
    ]
    
    #calculate time_to_entry and set NAs to 0
    DT[, time_to_entry := fcoalesce(period - plan_period, 0L)]
    
    DT
    
    #在当前行之前查找上一个最新进入的市场
    DT[输入市场==“是”,上一个输入:=
    
    fcoalesce(.SD[.SD,on=。(id,period=prev_entry,period非常感谢这种方法,Peace!我喜欢它,因为它看起来有点简单。但是,当我运行它时,它并没有完全重现所需的_输出。我收到的不是
    time_to_entry
    最后一个单元格中的1,而是0。在整个数据集上运行它时,问题似乎总是不同在计算当前期间(其中
    enter\u market==“yes”
    和第一个期间(其中
    plan\u entry==“yes”
    )之间,而不是计算下一个期间(其中
    plan\u entry==“yes”
    )。您确定吗?我在time\u to\u entry的最后一个单元格中得到1而不是0。我编辑答案以显示结果(请参见最后一列time\u to\u entry)。太好了,chinsoon12!非常感谢,这似乎工作得很好。您能解释一下为什么“.”例如in
    on=。(id,period表达式“()”是list()(from
    ?data.table
    )的简写别名。
    x.
    用于引用
    x[i,on=keys]联接中右表中的列吗
    i
    是左边的表格。从
    数据中读取所有的渐晕图。表格是一个很好的开始。太好了,非常感谢!
    
    # fill in first entry_period for each observation by company
    library(zoo) # for na.locf()
    
    dt <- as.data.table(dt)
    dt[, entry_period := na.locf(entry_period, na.rm = FALSE, fromLast = FALSE), by = id]
    dt[, entry_period := na.locf(entry_period, na.rm = FALSE, fromLast = TRUE), by = id]
    
    dt$time_to_entry <-
      ifelse(
        dt$plan_entry == "yes", 
        dt$entry_period - dt$plan_entry_period,
        NA)
    
    # check variable
    summary(dt$time_to_entry)
    
    dt
    #      id period plan_entry enter_market min_period min_entry_period entry_period plan_entry_period time_to_entry
    # 1: C001      1       <NA>           no          1                1           NA                NA            NA
    # 2: C001      2       <NA>           no          1                1           NA                NA            NA
    # 3: C001      3        yes           no          1                1           NA                 3            NA
    # 4: C002      2         no          yes          2                2           NA                NA             0
    # 5: C003      1       <NA>           no          1                1           NA                NA            NA
    # 6: C003      4         no           no          1                1           NA                NA             0
    # 7: C003      5         no           no          1                1           NA                NA             0
    # 8: C003      6        yes           no          1                1           NA                 6            NA
    # 9: C003     10        yes           no          1                1           NA                10            NA
    #10: C004      3        yes          yes          3                3           NA                 3            NA
    #11: C004      4        yes          yes          3                3           NA                 4            NA
    #12: C005      2       <NA>           no          2                2            9                NA            NA
    #13: C005      3       <NA>           no          2                2            9                NA            NA
    #14: C005      4        yes           no          2                2            9                 4             5
    #15: C005      7        yes           no          2                2            9                 7             2
    #16: C005      8        yes           no          2                2            9                 8             1
    #17: C005      9        yes          yes          2                9            9                 9             0
    #18: C005     10         no          yes          2                9            9                NA             0
    
    
    dt <- 
      data.table(id = as.factor(c(rep("C001", 3), "C002", rep("C003", 5), rep("C004", 2), rep("C005", 7))),
                 period = as.numeric(c(1, 2, 3, 2, 1, 4, 5, 6, 10, 3, 4, 2, 3, 4, 7, 8, 9, 10)),
                 plan_entry = as.factor(c(rep(NA, 2), "yes", "no", NA, rep("no", 2), rep("yes", 4), rep(NA, 2), rep("yes", 4), "no")),
                 enter_market = as.factor(c(rep("no", 3), "yes", rep("no", 5), rep("yes", 2), rep("no", 5), rep("yes", 2))))
    
    dt[, time_to_entry_with_plan := period - min(period),
       by = .(id,plan_entry)]
    
    dt[, time_to_entry_without_plan := period - min(period),
       by = .(id,enter_market)]
    
    dt[, time_to_entry:=fcase(enter_market == "yes" & plan_entry == "yes", time_to_entry_with_plan,
                              enter_market == "yes" & plan_entry == "no", time_to_entry_without_plan,
                              default = 0)]
    
         id period plan_entry enter_market time_to_entry_with_plan time_to_entry_without_plan time_to_entry
     1: C001      1       <NA>           no                       0                          0             0
     2: C001      2       <NA>           no                       1                          1             0
     3: C001      3        yes           no                       0                          2             0
     4: C002      2         no          yes                       0                          0             0
     5: C003      1       <NA>           no                       0                          0             0
     6: C003      4         no           no                       0                          3             0
     7: C003      5         no           no                       1                          4             0
     8: C003      6        yes           no                       0                          5             0
     9: C003     10        yes           no                       4                          9             0
    10: C004      3        yes          yes                       0                          0             0
    11: C004      4        yes          yes                       1                          1             1
    12: C005      2       <NA>           no                       0                          0             0
    13: C005      3       <NA>           no                       1                          1             0
    14: C005      4        yes           no                       0                          2             0
    15: C005      7        yes           no                       3                          5             0
    16: C005      8        yes           no                       4                          6             0
    17: C005      9        yes          yes                       5                          0             5
    18: C005     10         no          yes                       0                          1             1
    
    #find the previous latest enter_market before current row
    DT[enter_market=="yes", prev_entry := 
        fcoalesce(.SD[.SD, on=.(id, period<period), mult="last", x.period], 0L)
    ]
    
    #non-equi join to find the first plan_entry before current enter_market but after previous latest enter_market
    DT[enter_market=="yes", plan_period :=
        DT[plan_entry=="yes"][.SD, on=.(id, period>=prev_entry, period<period), mult="first", x.period]
    ]
    
    #calculate time_to_entry and set NAs to 0
    DT[, time_to_entry := fcoalesce(period - plan_period, 0L)]
    
    DT
    
    DT <- 
        data.table(id = c(rep("C001", 3), "C002", rep("C003", 5), rep("C004", 2), rep("C005", 7)),
            period = as.integer(c(1, 2, 3, 2, 1, 4, 5, 6, 10, 3, 4, 2, 3, 4, 7, 8, 9, 10)),
            plan_entry = c(rep(NA, 2), "yes", "no", NA, rep("no", 2), rep("yes", 4), rep(NA, 2), rep("yes", 4), "no"),
            enter_market = c(rep("no", 3), "yes", rep("no", 5), rep("yes", 2), rep("no", 5), rep("yes", 2)))