Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/asp.net-mvc/15.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
更新R中存在的行并添加R中不存在的行_R - Fatal编程技术网

更新R中存在的行并添加R中不存在的行

更新R中存在的行并添加R中不存在的行,r,R,我想知道如何更新这些数据,我的想法是我有一个历史数据库df1 身份证件 起始日期 结束日期 宠物 1. 10-04-2021 16-04-2021 猫 2. 21-04-2021 马 3. 10-04-2021 19-04-2021 狗 4. 03-04-2021 15-04-2021 狗 5. 25-04-2021 鱼 6. 06-04-2021 16-04-2021 猫 7. 22-04-2021 马 8. 11-04-2021 21-04-2021 鱼 我们可以将数据集绑定在一起,进行排列

我想知道如何更新这些数据,我的想法是我有一个历史数据库df1

身份证件 起始日期 结束日期 宠物 1. 10-04-2021 16-04-2021 猫 2. 21-04-2021 马 3. 10-04-2021 19-04-2021 狗 4. 03-04-2021 15-04-2021 狗 5. 25-04-2021 鱼 6. 06-04-2021 16-04-2021 猫 7. 22-04-2021 马 8. 11-04-2021 21-04-2021 鱼
我们可以将数据集绑定在一起,进行
排列
并按列
切片
第一行进行分组

library(dplyr)
bind_rows(df1, df2) %>%
   arrange(ID, StartDate, Pet, is.na(EndDate)) %>%
   group_by(ID, StartDate, Pet) %>%
   slice_head(n = 1) %>%
   ungroup 
-输出

# A tibble: 10 x 4
#      ID StartDate  EndDate    Pet  
#   <int> <chr>      <chr>      <chr>
# 1     1 10-04-2021 16-04-2021 Cat  
# 2     2 21-04-2021 22-04-2021 Horse
# 3     3 10-04-2021 19-04-2021 Dog  
# 4     4 03-04-2021 15-04-2021 Dog  
# 5     5 25-04-2021 27-04-2021 Fish 
# 6     6 06-04-2021 16-04-2021 Cat  
# 7     7 22-04-2021 <NA>       Horse
# 8     8 11-04-2021 21-04-2021 Fish 
# 9     9 25-04-2021 29-04-2021 Dog  
#10    10 27-04-2021 30-04-2021 Dog  
数据
df1这里是一个
dplyr
解决方案:

library(dplyr)
df1 %>% 
  full_join(df2) %>%
  group_by(ID) %>%
  summarise_each(funs(na.omit))
输出:

      ID StartDate  EndDate    Pet  
   <int> <chr>      <chr>      <chr>
 1     1 10-04-2021 16-04-2021 Cat  
 2     2 21-04-2021 22-04-2021 Horse
 3     2 21-04-2021 22-04-2021 Horse
 4     3 10-04-2021 19-04-2021 Dog  
 5     4 03-04-2021 15-04-2021 Dog  
 6     5 25-04-2021 27-04-2021 Fish 
 7     5 25-04-2021 27-04-2021 Fish 
 8     6 06-04-2021 16-04-2021 Cat  
 9     8 11-04-2021 21-04-2021 Fish 
10     9 25-04-2021 29-04-2021 Dog  
11    10 27-04-2021 30-04-2021 Dog 
ID开始日期结束日期宠物
1 10-04-2021 16-04-2021类别
2 21-04-2021 22-04-2021马
32 21-04-2021 22-04-2021马
4 3 10-04-2021 19-04-2021狗
5 4 03-04-2021 15-04-2021狗
625-04-2021 27-04-2021鱼
7 5 25-04-2021 27-04-2021鱼
8 6 06-04-2021 16-04-2021类别
9 8 11-04-2021 21-04-2021鱼
10 9 25-04-2021 29-04-2021狗
11 10 27-04-2021 30-04-2021狗
数据:


df1组合两个数据集,
排列
并通过
ID
保留唯一的行

library(dplyr)

df1 %>%
  bind_rows(df2) %>%
  arrange(ID, is.na(EndDate)) %>%
  distinct(ID, .keep_all = TRUE)

#   ID  StartDate    EndDate   Pet
#1   1 10-04-2021 16-04-2021   Cat
#2   2 21-04-2021 22-04-2021 Horse
#3   3 10-04-2021 19-04-2021   Dog
#4   4 03-04-2021 15-04-2021   Dog
#5   5 25-04-2021 27-04-2021  Fish
#6   6 06-04-2021 16-04-2021   Cat
#7   7 22-04-2021       <NA> Horse
#8   8 11-04-2021 21-04-2021  Fish
#9   9 25-04-2021 29-04-2021   Dog
#10 10 27-04-2021 30-04-2021   Dog
库(dplyr)
df1%>%
绑定_行(df2)%>%
排列(ID,is.na(结束日期))%>%
不同(ID,.keep_all=TRUE)
#ID开始日期结束日期宠物
#1 10-04-2021 16-04-2021类别
#2 21-04-2021 22-04-2021马
#3 10-04-2021 19-04-2021狗
#4 03-04-2021 15-04-2021狗
#5 25-04-2021 27-04-2021鱼
#6 06-04-2021 16-04-2021类别
#7 22-04-2021马
#8 11-04-2021 21-04-2021鱼
#9 25-04-2021 29-04-2021狗
#1027-04-2021 30-04-2021狗
在R基中:

df3 <- rbind(df1, df2)
df3 <- df3[with(df3, order(ID, is.na(EndDate))), ]
df3 <- df3[!duplicated(df3$ID), ]

df3共享您的数据集。还有你的代码尝试。这样我就能知道你哪里做错了
df1 <- tibble::tribble(
  ~ID,   ~StartDate,     ~EndDate,    ~Pet,
   1L, "10-04-2021", "16-04-2021",   "Cat",
   2L, "21-04-2021",           NA, "Horse",
   3L, "10-04-2021", "19-04-2021",   "Dog",
   4L, "03-04-2021", "15-04-2021",   "Dog",
   5L, "25-04-2021",           NA,  "Fish",
   6L, "06-04-2021", "16-04-2021",   "Cat",
   7L, "22-04-2021",           NA, "Horse",
   8L, "11-04-2021", "21-04-2021",  "Fish"
  )


df2 <- tibble::tribble(
    ~ID,   ~StartDate,     ~EndDate,    ~Pet,
     2L, "21-04-2021", "22-04-2021", "Horse",
     5L, "25-04-2021", "27-04-2021",  "Fish",
     9L, "25-04-2021", "29-04-2021",   "Dog",
    10L, "27-04-2021", "30-04-2021",   "Dog"
    )
library(dplyr)

df1 %>%
  bind_rows(df2) %>%
  arrange(ID, is.na(EndDate)) %>%
  distinct(ID, .keep_all = TRUE)

#   ID  StartDate    EndDate   Pet
#1   1 10-04-2021 16-04-2021   Cat
#2   2 21-04-2021 22-04-2021 Horse
#3   3 10-04-2021 19-04-2021   Dog
#4   4 03-04-2021 15-04-2021   Dog
#5   5 25-04-2021 27-04-2021  Fish
#6   6 06-04-2021 16-04-2021   Cat
#7   7 22-04-2021       <NA> Horse
#8   8 11-04-2021 21-04-2021  Fish
#9   9 25-04-2021 29-04-2021   Dog
#10 10 27-04-2021 30-04-2021   Dog
df3 <- rbind(df1, df2)
df3 <- df3[with(df3, order(ID, is.na(EndDate))), ]
df3 <- df3[!duplicated(df3$ID), ]