R 使用两个现有数据帧创建附加数据帧_R_Dplyr

R 使用两个现有数据帧创建附加数据帧

R 使用两个现有数据帧创建附加数据帧,r,dplyr,R,Dplyr,输入数据帧数据帧1（示例-nrow=100）数据帧2（示例-nrow=200）我想创建第三个数据集，其中包含在数据框2中为Col a条目找到的每个附加行输出文件（nrow=200-100=100）您可以将行号添加到每个数据帧，然后执行反连接操作： library(tidyverse) df2 %>% group_by(colA) %>% mutate(rn = row_number()) %>% anti_join(df1 %>% group_by

输入数据帧

数据帧1（示例-nrow=100）

数据帧2（示例-nrow=200）

我想创建第三个数据集，其中包含在数据框2中为Col a条目找到的每个附加行

输出文件（nrow=200-100=100）

您可以将行号添加到每个数据帧，然后执行

反连接操作

：

library(tidyverse)

df2 %>%
  group_by(colA) %>%
  mutate(rn = row_number()) %>%
  anti_join(df1 %>% group_by(colA) %>% mutate(rn = row_number())) %>%
  select(-rn)

输出

# A tibble: 2 x 3
# Groups:   colA [2]
  colA   colD  colE
  <chr> <dbl> <dbl>
1 a        51    63
2 b        11    13

#一个tible:2x3
#组别:可乐[2]
可乐冷盘
1 a 51 63
2 b 11 13

如果我们在循环中需要它，请使用第二个数据集的列名创建一个空数据集。循环第二个数据集的“ColA”的

unique

值，

subset

df2，获取子集与“df1”（“cnt”）的对应行之间的行数差异，

rbind

数据集子集的

tail

的“out”

#// Create an empty dataset structure
out <- data.frame(ColA = character(), ColD = numeric(), ColE = numeric()) 
# // Get the unique values of the column 
un1 <- unique(df2$ColA)
# // Loop over the unique values
for(un in un1)  {
        # // subset the dataset df2
        tmp <- subset(df2, ColA == un)
        # // get a difference in row count
        cnt <- nrow(tmp) - sum(df1$ColA == un)
        # // use the count to subset the subset of df2
        # // rbind and assign back to the original out
        out <- rbind(out, tail(tmp, cnt))
}

row.names(out) <- NULL
out
#  ColA ColD ColE
#1    a   51   63
#2    b   11   13

创建函数

f1 <- function(data1, data2, by_cols) {
           # // Create an empty dataset structure
           
           # // Get the unique value by pasteing the by_cols
           data2$new <- do.call(paste, data2[by_cols])
           data1$new <- do.call(paste, data1[by_cols])
           out <- data2[0,]
           un1 <- unique(data2$new)
          

           # // Loop over the unique values
           for(un in un1) {
                # // subset the second dataset              
                tmp <- subset(data2, new == un)
              
                # // get the difference in row count
                cnt <- nrow(tmp) - sum(data1$new == un)
               
                # // use the count to subet the subset of data2
                # // rbind and assign back to the original out
                out <- rbind(out, tail(tmp, cnt))
              }
            out$new <- NULL
            row.names(out) <- NULL
            out
         }
f1(df1, df2, c("ColA", "ColB"))
# ColA ColB ColE
#1    a   31   41
#2    b   11   13

f1我们可以用for循环来完成这个任务吗？？？@AashayMehta是的，但是你能在这里进一步描述你的兴趣吗？是否要按colA
组循环，并比较两个数据帧？在比较两个数据帧时，是否需要包括任何其他操作？我想可能有多种方法可以满足您的需要。因为我在数据集中有多个列，df2（ncol=10）的名称与df1（ncol=100）的名称相同，所以使用管道的方法创建不同的输出。很抱歉，当我发布这个问题时，我应该更加小心地使用列名。此外，我自己也在尝试使用for循环来解决这个问题，因此我想学习使用for循环来解决这个问题。是的，我想通过ColA循环，但我不想在循环时执行任何附加操作。我只想用在df2中找到的附加条目创建一个新的df。如果我们使用附加列（ColB）来选择我们在数据“out”中添加的行，我们需要在代码中做什么更改。现在，我们不再删除最后一行，而是删除colB值不匹配的行。请参阅我在输入数据框中对数据框1和数据框2所做的更改。@AashayMehta请作为新问题发布
# A tibble: 2 x 3
# Groups:   colA [2]
  colA   colD  colE
  <chr> <dbl> <dbl>
1 a        51    63
2 b        11    13

#// Create an empty dataset structure
out <- data.frame(ColA = character(), ColD = numeric(), ColE = numeric()) 
# // Get the unique values of the column 
un1 <- unique(df2$ColA)
# // Loop over the unique values
for(un in un1)  {
        # // subset the dataset df2
        tmp <- subset(df2, ColA == un)
        # // get a difference in row count
        cnt <- nrow(tmp) - sum(df1$ColA == un)
        # // use the count to subset the subset of df2
        # // rbind and assign back to the original out
        out <- rbind(out, tail(tmp, cnt))
}

row.names(out) <- NULL
out
#  ColA ColD ColE
#1    a   51   63
#2    b   11   13

df1 <- data.frame(ColA = c('a', 'a', 'b', 'c'), ColB = c(1, 3, 5, 9), 
      ColC = c(2, 4, 6, 10))
df2 <- data.frame(ColA = c('a', 'a', 'a', 'b', 'b', 'c'), 
     ColB = c(1, 31, 3, 5, 11, 9), ColE = c(22, 41, 63, 6, 13, 20))

f1 <- function(data1, data2, by_cols) {
           # // Create an empty dataset structure
           
           # // Get the unique value by pasteing the by_cols
           data2$new <- do.call(paste, data2[by_cols])
           data1$new <- do.call(paste, data1[by_cols])
           out <- data2[0,]
           un1 <- unique(data2$new)
          

           # // Loop over the unique values
           for(un in un1) {
                # // subset the second dataset              
                tmp <- subset(data2, new == un)
              
                # // get the difference in row count
                cnt <- nrow(tmp) - sum(data1$new == un)
               
                # // use the count to subet the subset of data2
                # // rbind and assign back to the original out
                out <- rbind(out, tail(tmp, cnt))
              }
            out$new <- NULL
            row.names(out) <- NULL
            out
         }
f1(df1, df2, c("ColA", "ColB"))
# ColA ColB ColE
#1    a   31   41
#2    b   11   13    

df1 <- structure(list(ColA = c("a", "a", "b", "c"), ColB = c(1, 3, 5, 
9), ColC = c(2, 4, 6, 10)), class = "data.frame", row.names = c(NA, 
-4L))

df2 <- structure(list(ColA = c("a", "a", "a", "b", "b", "c"), ColD = c(12, 
31, 51, 71, 11, 93), ColE = c(22, 41, 63, 86, 13, 20)), class = "data.frame",
row.names = c(NA, 
-6L))