R 选择每行出现n次的(多个)整数

R 选择每行出现n次的(多个)整数,r,dataframe,data.table,plyr,R,Dataframe,Data.table,Plyr,我有一个data.frame,其中数据条目以这种格式输入1,2,3,10。也就是说,它们是逗号分隔的整数,范围为0-20,不需要是连续的。目前,每一项都被视为一个因素。我有四个变量包含这些值,我想创建一个新的变量,它只包含一个给定的整数,若它在四个变量中的三个变量中,若一个整数并没有出现三次,那个么使用0 M1 M2 M3 M4 M_NEW 1 1,2 0 1 1 3,4 3,4 1,2,3,4 4

我有一个data.frame,其中数据条目以这种格式输入1,2,3,10。也就是说,它们是逗号分隔的整数,范围为0-20,不需要是连续的。目前,每一项都被视为一个因素。我有四个变量包含这些值,我想创建一个新的变量,它只包含一个给定的整数,若它在四个变量中的三个变量中,若一个整数并没有出现三次,那个么使用0

M1    M2      M3      M4      M_NEW
1     1,2     0        1       1
3,4   3,4   1,2,3,4    4       3,4
我不确定如何处理这些逗号分隔的整数。如果它们是单整数,我可以这样做:

虽然这些多个值之间用逗号分隔,但我不确定从何处开始

# data and example output
df <- structure(list(M1 = structure(c(3L, 2L, 2L, 5L, 3L, 1L, 7L, 1L, 
8L, 1L, 3L, 4L, 3L, 6L), .Label = c("0", "1", "1,2", "1,2,3", 
"1,2,3,4", "1,2,3,4,5", "3,4,5,6,7", "6,7,8,9,10,11,12,13,14,15,16"
), class = "factor"), M2 = structure(c(5L, 2L, 2L, 4L, 4L, 1L, 
11L, 8L, 7L, 9L, 3L, 6L, 3L, 10L), .Label = c("0", "1,2", "1,2,3", 
"1,2,3,4", "1,2,3,4,5", "1,2,3,4,5,6,7", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16", 
"2", "2,3,4,5", "4,5,6", "4,5,6,7,8,9,10,11,12,13,14"), class = "factor"), 
    M3 = structure(c(4L, 1L, 1L, 8L, 3L, 1L, 6L, 1L, 7L, 3L, 
    2L, 5L, 9L, 3L), .Label = c("0", "1,2", "1,2,3,4", "1,2,3,4,5", 
    "1,2,3,4,5,6", "1,2,3,4,5,6,7,8", "1,2,3,4,5,6,7,8,9,10,11,12,13,14", 
    "3,4", "3,4,5"), class = "factor"), M4 = structure(c(5L, 
    1L, 2L, 8L, 2L, 1L, 6L, 3L, 4L, 1L, 3L, 3L, 7L, 9L), .Label = c("0", 
    "1", "1,2", "1,2,3,4,5,12,13,14,15,16,17", "1,2,3,4,5,6", 
    "1,2,3,4,5,6,7,8,9,10,11,12", "3,4", "4", "4,5"), class = "factor"), 
    M_NEW = structure(c(6L, 1L, 2L, 8L, 3L, 1L, 9L, 1L, 7L, 1L, 
    3L, 4L, 5L, 10L), .Label = c("0", "1", "1,2", "1,2,3", "1,2,3,", 
    "1,2,3,4,5", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16", "3,4", 
    "3,4,5,6,7,8", "4,5"), class = "factor")), .Names = c("M1", 
"M2", "M3", "M4", "M_NEW"), class = "data.frame", row.names = c(NA, 
-14L))

代码有效,谢谢。我有一个关于res的问题,res=[1]\0\,\1,3,1,2,3,4,6,7,8,2,2\,3,4,5,6,7什么是非整数值?e、 g.\0\,在res?/中转义特殊字符,如。我的第一个猜测是,真实数据中的某些列不仅包含数字,还包含引号,例如0,1,2,3,4,而不是0,1,2,3,4。
# data and example output
df <- structure(list(M1 = structure(c(3L, 2L, 2L, 5L, 3L, 1L, 7L, 1L, 
8L, 1L, 3L, 4L, 3L, 6L), .Label = c("0", "1", "1,2", "1,2,3", 
"1,2,3,4", "1,2,3,4,5", "3,4,5,6,7", "6,7,8,9,10,11,12,13,14,15,16"
), class = "factor"), M2 = structure(c(5L, 2L, 2L, 4L, 4L, 1L, 
11L, 8L, 7L, 9L, 3L, 6L, 3L, 10L), .Label = c("0", "1,2", "1,2,3", 
"1,2,3,4", "1,2,3,4,5", "1,2,3,4,5,6,7", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16", 
"2", "2,3,4,5", "4,5,6", "4,5,6,7,8,9,10,11,12,13,14"), class = "factor"), 
    M3 = structure(c(4L, 1L, 1L, 8L, 3L, 1L, 6L, 1L, 7L, 3L, 
    2L, 5L, 9L, 3L), .Label = c("0", "1,2", "1,2,3,4", "1,2,3,4,5", 
    "1,2,3,4,5,6", "1,2,3,4,5,6,7,8", "1,2,3,4,5,6,7,8,9,10,11,12,13,14", 
    "3,4", "3,4,5"), class = "factor"), M4 = structure(c(5L, 
    1L, 2L, 8L, 2L, 1L, 6L, 3L, 4L, 1L, 3L, 3L, 7L, 9L), .Label = c("0", 
    "1", "1,2", "1,2,3,4,5,12,13,14,15,16,17", "1,2,3,4,5,6", 
    "1,2,3,4,5,6,7,8,9,10,11,12", "3,4", "4", "4,5"), class = "factor"), 
    M_NEW = structure(c(6L, 1L, 2L, 8L, 3L, 1L, 9L, 1L, 7L, 1L, 
    3L, 4L, 5L, 10L), .Label = c("0", "1", "1,2", "1,2,3", "1,2,3,", 
    "1,2,3,4,5", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16", "3,4", 
    "3,4,5,6,7,8", "4,5"), class = "factor")), .Names = c("M1", 
"M2", "M3", "M4", "M_NEW"), class = "data.frame", row.names = c(NA, 
-14L))
f <- function(x, n=3) {
  tab <- table(strsplit(paste(x, collapse=","), ","))
  res <- paste(names(tab[which(tab >= n)]), collapse=",")
  return(ifelse(res == "", "0", res))
}
(df[, 5] <- apply(df[, 1:4], 1, f))
# [1] "1,2,3,4,5"                             
# [2] "0"                                     
# [3] "1"                                     
# [4] "3,4"                                   
# [5] "1,2"                                   
# [6] "0"                                     
# [7] "3,4,5,6,7,8"                           
# [8] "0"                                     
# [9] "1,10,11,12,13,14,15,16,2,3,4,5,6,7,8,9"
# [10] "0"                                     
# [11] "1,2"                                   
# [12] "1,2,3"                                 
# [13] "3"                                     
# [14] "4,5"