R中的条件交叉

R中的条件交叉,r,conditional,dplyr,intersection,R,Conditional,Dplyr,Intersection,希望筛选出两个表格相交条件1中的动物,并在表格相交条件2的相同类别中共享相同大小的动物。知道一种有效的编码方法吗?例如,使用dplyr library(dplyr) animal1 <- data.frame(type = c("cat", "dog", "dog","bird", "elephant"), size = c("small","large","small", "medium", "large"), tableName = rep

希望筛选出两个表格相交条件1中的动物,并在表格相交条件2的相同类别中共享相同大小的动物。知道一种有效的编码方法吗?例如,使用dplyr

library(dplyr)
animal1 <- data.frame(type = c("cat", "dog", "dog","bird", "elephant"), 
                      size = c("small","large","small", "medium", "large"), tableName = rep("animal1",5), stringsAsFactors = F)
      #       type   size tableName
      # 1      cat  small   animal1
      # 2      dog  large   animal1
      # 3      dog  small   animal1
      # 4     bird  medium   animal1
      # 5 elephant  large   animal1

animal2 <- data.frame(type = c("elephant", "dog", "dog", "elephant", "elephant"), 
                      size = c("medium","large","large", "small", "large"), 
                      tableName = rep("animal2",5), stringsAsFactors = F)
      #      type   size tableName
      # 1 elephant medium   animal2
      # 2      dog  large   animal2
      # 3      dog  large   animal2
      # 4 elephant  small   animal2
      # 5 elephant  large   animal2


rbindAnimal <- rbind(animal1, animal2)
      #        type   size tableName
      # 1       cat  small   animal1
      # 2       dog  large   animal1
      # 3       dog  small   animal1
      # 4      bird medium   animal1
      # 5  elephant  large   animal1
      # 6  elephant medium   animal2
      # 7       dog  large   animal2
      # 8       dog  large   animal2
      # 9  elephant  small   animal2
      # 10 elephant  large   animal2

# Intersection across both tables
intersectType <- intersect( rbindAnimal %>% filter(tableName == "animal1") %>% select(type), 
                                          rbindAnimal %>% filter(tableName == "animal2") %>% select(type) ) 
      #     type
      # 1 elephant
      # 2      dog

rbindAnimal <- rbindAnimal[which(rbindAnimal$type %in% intersectType$type),]

      #        type   size tableName
      # 2       dog  large   animal1
      # 3       dog  small   animal1
      # 5  elephant  large   animal1
      # 6  elephant medium   animal2
      # 7       dog  large   animal2
      # 8       dog  large   animal2
      # 9  elephant  small   animal2
      # 10 elephant  large   animal2

# Needs to return row numbers! Here: 2,5,7,8, and 10
#        type   size tableName
# 2       dog  large   animal1
# 5  elephant  large   animal1
# 7       dog  large   animal2
# 8       dog  large   animal2
# 10 elephant  large   animal2

解决方案:使用合并/半加入/反加入谢谢@Imo的合并提示

library(dplyr)
animal1 <- data.frame(type = c("cat", "dog", "dog","bird", "elephant"), 
                      size = c("small","large","small", "medium", "large"), tableName = rep("animal1",5), stringsAsFactors = F)
      #       type   size tableName
      # 1      cat  small   animal1
      # 2      dog  large   animal1
      # 3      dog  small   animal1
      # 4     bird  medium   animal1
      # 5 elephant  large   animal1

animal2 <- data.frame(type = c("elephant", "dog", "dog", "elephant", "elephant"), 
                      size = c("medium","large","large", "small", "large"), 
                      tableName = rep("animal2",5), stringsAsFactors = F)
      #      type   size tableName
      # 1 elephant medium   animal2
      # 2      dog  large   animal2
      # 3      dog  large   animal2
      # 4 elephant  small   animal2
      # 5 elephant  large   animal2

rbindAnimal <- rbind(animal1, animal2)
mergedAnimals <- merge(animal1, animal2, by = c("type","size"), all = T)
sharedTypeSize <- mergedAnimals[complete.cases(mergedAnimals),] %>% select(type,size) %>% unique
sharedTypeSize <- merge(rbindAnimal, sharedTypeSize)

semi_join(rbindAnimal, sharedTypeSize)
      #        type  size tableName
      # 1      dog large   animal1
      # 2      dog large   animal2
      # 3      dog large   animal2
      # 4 elephant large   animal1
      # 5 elephant large   animal2

anti_join(rbindAnimal, sharedTypeSize)

      #       type   size tableName
      # 1      cat  small   animal1
      # 2      dog  small   animal1
      # 3     bird medium   animal1
      # 4 elephant medium   animal2
      # 5 elephant  small   animal2
需要返回行号

使用data.table中的.I非常简单,它存储行号:

library(data.table)
setDT(rbindAnimal)

w <- rbindAnimal[, if (uniqueN(tableName) > 1L) .I, by=.(type, size)]$V1
# [1]  2  7  8  5 10
rbindAnimal[-w]
#        type   size tableName
# 1:      cat  small   animal1
# 2:      dog  small   animal1
# 3:     bird medium   animal1
# 4: elephant medium   animal2
# 5: elephant  small   animal2
rbindAnimal %>% group_by(type, size) %>% filter(n_distinct(tableName) == 1L)
#       type   size tableName
#      <chr>  <chr>     <chr>
# 1      cat  small   animal1
# 2      dog  small   animal1
# 3     bird medium   animal1
# 4 elephant medium   animal2
# 5 elephant  small   animal2

所需的输出不清楚。您是在尝试合并类型和大小,还是只保留两个数据帧中都不存在的类型大小观察值?这一点很好!我认为按类型和大小合并是我的目标。最后几行显示了所需的输出,其中行索引值可以允许反向过滤。