Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/72.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 将数据拆分为若干部分_R - Fatal编程技术网

R 将数据拆分为若干部分

R 将数据拆分为若干部分,r,R,下面是我的数据,我想根据ID将其拆分为几个部分 df1<- structure(list(Ids1 = 1:7, string1 = structure(c(3L, 2L, 4L, 1L, 1L, 1L, 1L), .Label = c("gdyijq,udyhfs,gqdtr", "hdydg", "hishsgd,gugddf", "ydis"), class = "factor"), Ids2 = c(1L, 3L, 4L, 9L, 10L, NA, NA), string2

下面是我的数据,我想根据ID将其拆分为几个部分

df1<- structure(list(Ids1 = 1:7, string1 = structure(c(3L, 2L, 4L, 
1L, 1L, 1L, 1L), .Label = c("gdyijq,udyhfs,gqdtr", "hdydg", "hishsgd,gugddf", 
"ydis"), class = "factor"), Ids2 = c(1L, 3L, 4L, 9L, 10L, NA, 
NA), string2 = structure(c(4L, 6L, 2L, 3L, 5L, 1L, 1L), .Label = c("", 
"gdyijq,udyhfs", "gqdtr", "hishsgd,gugddf", "nlrshf", "ydis"), class = "factor")), .Names = c("Ids1", 
"string1", "Ids2", "string2"), class = "data.frame", row.names = c(NA, 
-7L))
我这样做

df.1 <- df1[which(df1$Ids1 == df1$Ids2), ]
我这样做,但也不工作

df.2<- df1[which(df1$Ids1 != df1$Ids2), ]
df.3<- df1[which(df1$Ids2 != df1$Ids1), ]
我这样做,但也不起作用

df.2<- df1[which(df1$Ids1 != df1$Ids2), ]
df.3<- df1[which(df1$Ids2 != df1$Ids1), ]

df.3下面是一个基于使用
dplyr
package的连接的解决方案:

library(dplyr)

df.1 <- inner_join(select(df1, Ids1, string1), select(df1, Ids2, string2), by = c('Ids1' = 'Ids2'))
df.1$Similar <- apply(df.1[, -1], 1, function(x) sum(unlist(strsplit(x[1], ',')) %in% unlist(strsplit(x[2], ','))))

df.2 <- anti_join(select(df1, Ids1, string1), select(df1, Ids2, string2), by = c('Ids1' = 'Ids2'))
df.3 <- anti_join(select(df1, Ids2, string2), select(df1, Ids1, string1), by = c('Ids2' = 'Ids1'))
df.3 <- df.3[complete.cases(df.3), ]
库(dplyr)
df.1
library(dplyr)

df.1 <- inner_join(select(df1, Ids1, string1), select(df1, Ids2, string2), by = c('Ids1' = 'Ids2'))
df.1$Similar <- apply(df.1[, -1], 1, function(x) sum(unlist(strsplit(x[1], ',')) %in% unlist(strsplit(x[2], ','))))

df.2 <- anti_join(select(df1, Ids1, string1), select(df1, Ids2, string2), by = c('Ids1' = 'Ids2'))
df.3 <- anti_join(select(df1, Ids2, string2), select(df1, Ids1, string1), by = c('Ids2' = 'Ids1'))
df.3 <- df.3[complete.cases(df.3), ]
df.2 <- df1[!df1$Ids1 %in% df1$Ids2, c('Ids1', 'string1')]
df.3 <- df1[!df1$Ids2 %in% df1$Ids1, c('Ids2', 'string2')]
df.3 <- df.3[complete.cases(df.3), ]