R 基于另一个数据帧在数据帧中插入新行

R 基于另一个数据帧在数据帧中插入新行,r,dplyr,data.table,R,Dplyr,Data.table,样本数据 dat <- data.table(yr = c(2013,2013,2013,2013,2013,2013,2013,2013,2013,2013,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012), location = c("Bh","Bh","Bh","Bh","Bh","Go","Go","Go","Go","Go","Bh"

样本数据

dat <- data.table(yr = c(2013,2013,2013,2013,2013,2013,2013,2013,2013,2013,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012),                  
                         location = c("Bh","Bh","Bh","Bh","Bh","Go","Go","Go","Go","Go","Bh","Bh","Bh","Bh","Bh","Bh","Go","Go","Go","Go","Go"),
                          time.period = c("t4","t5","t6","t7","t8","t3","t4","t5","t6","t7","t3","t4","t5","t6","t7","t8","t3","t4","t5","t6","t7"),
                          period = c(20,21,22,23,24,19,20,21,22,23,19,20,21,22,23,24,19,20,21,22,23),
                          value = c(runif(21)))

key <- data.table(time.period = c("t1","t2","t3","t4","t5","t6","t7","t8","t9","t10"),
                           period = c(17,18,19,20,21,22,23,24,25,26))
我想做:

            yr    location    time.period  period      value
        1: 2013       Bh          t1        17       0
        1: 2013       Bh          t2        18       0
        1: 2013       Bh          t3        19       0
        1: 2013       Bh          t4        20       0.7167561
        2: 2013       Bh          t5        21       0.5659722
        3: 2013       Bh          t6        22       0.8549229
        4: 2013       Bh          t7        23       0.1046213
        5: 2013       Bh          t8        24       0.8144670
        1: 2013       Bh          t9        25       0
        1: 2013       Bh          t10       26       0
我试过这个:

   dat %>% group_by(location,yr) %>% complete(period = seq(17, max(26), 1L))

   A tibble: 40 x 5
   Groups:   location, yr [4]
          location    yr period time.period      value
             <chr>   <dbl>  <dbl>       <chr>      <dbl>
     1       Bh      2012     17        <NA>         NA
     2       Bh      2012     18        <NA>         NA
     3       Bh      2012     19          t3 0.46757583
     4       Bh      2012     20          t4 0.07041745
     5       Bh      2012     21          t5 0.58707367
     6       Bh      2012     22          t6 0.83271673
     7       Bh      2012     23          t7 0.76918731
     8       Bh      2012     24          t8 0.25368225
     9       Bh      2012     25        <NA>         NA
    10       Bh      2012     26        <NA>         NA
    # ... with 30 more rows

正如您所看到的,time.period不是fill。我该如何填写该栏?

您需要这样的东西吗

x <- merge(dat, key, by = "time.period", all.y = T)
x[is.na(x)] <- 0

你需要这样的东西吗

x <- merge(dat, key, by = "time.period", all.y = T)
x[is.na(x)] <- 0
由于您使用的是data.table,因此可以执行以下操作:

dat_new <- dat[,.SD[key, on='time.period'],.(location, yr)]
dat_new[, period := i.period][, i.period := NULL]
dat_new[is.na(value), value := 0]

print(head(dat_new), 10)

    location   yr time.period period     value
 1:       Bh 2013          t1     17 0.0000000
 2:       Bh 2013          t2     18 0.0000000
 3:       Bh 2013          t3     19 0.0000000
 4:       Bh 2013          t4     20 0.9255600
 5:       Bh 2013          t5     21 0.3816035
 6:       Bh 2013          t6     22 0.5202268
 7:       Bh 2013          t7     23 0.5326466
 8:       Bh 2013          t8     24 0.5091590
 9:       Bh 2013          t9     25 0.0000000
10:       Bh 2013         t10     26 0.0000000
说明:

1.首先,我们将关键数据帧与dat中的每组.location,yr连接起来。 2.这会将列键dataframe添加为i.period。 3.最后,我们将NA设置为0,并在设置period:=i.period之后删除i.period列。

由于您使用的是data.table,因此可以执行以下操作:

dat_new <- dat[,.SD[key, on='time.period'],.(location, yr)]
dat_new[, period := i.period][, i.period := NULL]
dat_new[is.na(value), value := 0]

print(head(dat_new), 10)

    location   yr time.period period     value
 1:       Bh 2013          t1     17 0.0000000
 2:       Bh 2013          t2     18 0.0000000
 3:       Bh 2013          t3     19 0.0000000
 4:       Bh 2013          t4     20 0.9255600
 5:       Bh 2013          t5     21 0.3816035
 6:       Bh 2013          t6     22 0.5202268
 7:       Bh 2013          t7     23 0.5326466
 8:       Bh 2013          t8     24 0.5091590
 9:       Bh 2013          t9     25 0.0000000
10:       Bh 2013         t10     26 0.0000000
说明:

1.首先,我们将关键数据帧与dat中的每组.location,yr连接起来。 2.这会将列键dataframe添加为i.period。 3.最后,我们将NA设置为0,并在设置period:=i.period之后删除i.period列。

tidyr::complete可用于找到解决方案

library(dplyr)
library(tidyr)
dat %>% complete(yr, location, key, fill = list(value = 0)) )

# # A tibble: 40 x 5
#    yr  location time.period period  value
#   <dbl> <chr>    <chr>        <dbl> <dbl>
# 1  2012 Bh       t1            17.0 0    
# 2  2012 Bh       t2            18.0 0    
# 3  2012 Bh       t3            19.0 0.177
# 4  2012 Bh       t4            20.0 0.687
# 5  2012 Bh       t5            21.0 0.384
# 6  2012 Bh       t6            22.0 0.770
# 7  2012 Bh       t7            23.0 0.498
# 8  2012 Bh       t8            24.0 0.718
# 9  2012 Bh       t9            25.0 0    
# 10  2012 Bh       t10           26.0 0    
# # ... with 30 more rows
资料

tidyr::complete可用于找到解决方案

library(dplyr)
library(tidyr)
dat %>% complete(yr, location, key, fill = list(value = 0)) )

# # A tibble: 40 x 5
#    yr  location time.period period  value
#   <dbl> <chr>    <chr>        <dbl> <dbl>
# 1  2012 Bh       t1            17.0 0    
# 2  2012 Bh       t2            18.0 0    
# 3  2012 Bh       t3            19.0 0.177
# 4  2012 Bh       t4            20.0 0.687
# 5  2012 Bh       t5            21.0 0.384
# 6  2012 Bh       t6            22.0 0.770
# 7  2012 Bh       t7            23.0 0.498
# 8  2012 Bh       t8            24.0 0.718
# 9  2012 Bh       t9            25.0 0    
# 10  2012 Bh       t10           26.0 0    
# # ... with 30 more rows
资料


似乎有点相关的和,和'链接'中的职位。谢谢。我已经尝试过这些解决方案。我还有一个问题,我已经编辑了这个问题。似乎有点相关的和,和'链接'的帖子。谢谢。我已经尝试过这些解决方案。我还有一个问题,我已经编辑了这个问题。@Crop89我真的很乐意帮忙。感谢您提供示例数据和解决方案。找到解决办法很有帮助:-@Crop89我真的很乐意帮忙。感谢您提供示例数据和解决方案。它有助于找到解决方案:-