在R中选择数据表中的列

在R中选择数据表中的列,r,data.table,R,Data.table,我有如下所示的“in_table”。我需要使用“Comb\u table”获取“Table1”、“Table2”、“Table3”等。基本上,当Comb_表中的变量为1时,我需要将其包含在列表中 有没有什么有效的方法可以用R语言而不是手动输入所有的组合 感谢您的帮助 谢谢 in_table: POL Var1 Var2 Var3 Var4 Var5 Var6 Var7 8035 1 11 1 GRD 0030 0110 09

我有如下所示的
“in_table”
。我需要使用
“Comb\u table”
获取
“Table1”、“Table2”、“Table3”等。基本上,当Comb_表中的变量为1时,我需要将其包含在列表中

有没有什么有效的方法可以用R语言而不是手动输入所有的组合

感谢您的帮助

谢谢

in_table:

POL    Var1  Var2  Var3  Var4  Var5    Var6    Var7 
8035   1     11    1     GRD   0030    0110    09/30
36763  1     88    13    GRD   5260    0300    11/15
36763  1     88    13    GRD   5280    0300    11/15
35786  1     88    13    GRD   0030    0110    09/30


Comb_table:
        Var1  Var2  Var3  Var4  Var5  Var6  Var7
 Table1   1     1   1     1     1     1     1
 Table2   0     1   1     1     1     1     1
 Table3   1     0   1     1     1     1     1


Table1 <- in_table[, .(Pol_count = length(unique(POL))), by = list(Var1,Var2,Var3,Var4,Var5,Var6,Var7)] 

Table2 <- in_table[, .(Pol_count = length(unique(POL))), by = list(Var2,Var3,Var4,Var5,Var6,Var7)] 

Table3 <- in_table[, .(Pol_count = length(unique(POL))), by = list(Var1,Var3,Var4,Var5,Var6,Var7)] 

and so on. 
在表格中:
POL Var1 Var2 Var3 Var4 Var5 Var6 Var7
8035 1 11 1 GRD 0030 0110 09/30
36763 18813 GRD 5260 0300 11/15
36763 18813 GRD 5280 0300 11/15
3578618813 GRD 003001010 09/30
梳齿台:
Var1 Var2 Var3 Var4 Var5 Var6 Var7
表1
表2 0 1 1
表311
表1可能是这样的:

创建一个变量名为
1
NA
0

nm_list <- data.frame( do.call("rbind", Map( function(x,y) as.character(factor(x, levels = c(0,1), labels = c(NA, y))),
                                             x = Comb_table, y = names(Comb_table))),
                       stringsAsFactors = FALSE )
nm_list
#        X1   X2   X3
# Var1 Var1 <NA> Var1
# Var2 Var2 Var2 <NA>
# Var3 Var3 Var3 Var3
# Var4 Var4 Var4 Var4
# Var5 Var5 Var5 Var5
# Var6 Var6 Var6 Var6
# Var7 Var7 Var7 Var7

library('data.table')
setDT(in_table)  # convert data frame to data table by reference
lapply( nm_list, function(x) {
  x <- na.omit(x) # remove NA
  in_table[, .(Pol_count = length(unique(POL))), by = x]  # extract the variables by passing the values to by argument
})

# $X1
#    Var1 Var2 Var3 Var4 Var5 Var6  Var7 Pol_count
# 1:    1   11    1  GRD   30  110 09/30         1
# 2:    1   88   13  GRD 5260  300 11/15         1
# 3:    1   88   13  GRD 5280  300 11/15         1
# 4:    1   88   13  GRD   30  110 09/30         1
# 
# $X2
#    Var2 Var3 Var4 Var5 Var6  Var7 Pol_count
# 1:   11    1  GRD   30  110 09/30         1
# 2:   88   13  GRD 5260  300 11/15         1
# 3:   88   13  GRD 5280  300 11/15         1
# 4:   88   13  GRD   30  110 09/30         1
# 
# $X3
#    Var1 Var3 Var4 Var5 Var6  Var7 Pol_count
# 1:    1    1  GRD   30  110 09/30         1
# 2:    1   13  GRD 5260  300 11/15         1
# 3:    1   13  GRD 5280  300 11/15         1
# 4:    1   13  GRD   30  110 09/30         1
nm\u列表此功能:

> library(magrittr)
> melt(comb_table, id="tab", variable.factor=FALSE)[value == 1] %>% 
  split(by="tab") %>% 
  lapply(function(z) in_table[, .(n = uniqueN(POL)), by=c(z$variable)])

$Table1
   Var1 Var2 Var3 Var4 Var5 Var6  Var7 n
1:    1   11    1  GRD   30  110 09/30 1
2:    1   88   13  GRD 5260  300 11/15 1
3:    1   88   13  GRD 5280  300 11/15 1
4:    1   88   13  GRD   30  110 09/30 1

$Table3
   Var1 Var3 Var4 Var5 Var6  Var7 n
1:    1    1  GRD   30  110 09/30 1
2:    1   13  GRD 5260  300 11/15 1
3:    1   13  GRD 5280  300 11/15 1
4:    1   13  GRD   30  110 09/30 1

$Table2
   Var2 Var3 Var4 Var5 Var6  Var7 n
1:   11    1  GRD   30  110 09/30 1
2:   88   13  GRD 5260  300 11/15 1
3:   88   13  GRD 5280  300 11/15 1
4:   88   13  GRD   30  110 09/30 1
这里使用magrittr只是为了方便

或者,如果您可以将所有数据都放在一个表中,并且正在使用data.table>=1.10.5,那么类似的内容(我没有测试过它…)应该可以用于分组集:

> melt(comb_table, id="tab", variable.factor=FALSE)[value == 1, groupingsets(
  in_table,
  sets = split(variable, tab)
)]
使用的数据:我决定OP的行名是/应该是一个名为“tab”的列

res=comb_table[,(列表(在_table[,uniqueN(POL),by=c(name(.SD)[.SD==1])),by=tab]
#表V1
#1:表1
#2:表2
#3:表3
res$V1
#[[1]]
#Var1 Var2 Var3 Var4 Var5 Var6 Var7 V1
#1:1111 GRD311009/3011
#2:18813GRD52603001/1511
#3:18813GRD528030011/1511
#4:18813GRD311009/3011
#
#[[2]]
#Var2 Var3 Var4 Var5 Var6 Var7 V1
#1:11 GRD311009/3011
#2:88 13 GRD 5260 300 11/15 1
#3:88 13 GRD 5280 300 11/15 1
#4:8813GRD311009/3011
#
#[[3]]
#Var1 Var3 Var4 Var5 Var6 Var7 V1
#1:11 GRD 30110 09/30 1
#2:113GRD526030011/1511
#3:113GRD528030011/1511
#4:113GRD311009/3011

可能会看到
?uniqueN
?分组集
。下面是后者的一个例子
> melt(comb_table, id="tab", variable.factor=FALSE)[value == 1, groupingsets(
  in_table,
  sets = split(variable, tab)
)]
> dput(setDF(comb_table))
structure(list(tab = c("Table1", "Table2", "Table3"), Var1 = c(1L, 
0L, 1L), Var2 = c(1L, 1L, 0L), Var3 = c(1L, 1L, 1L), Var4 = c(1L, 
1L, 1L), Var5 = c(1L, 1L, 1L), Var6 = c(1L, 1L, 1L), Var7 = c(1L, 
1L, 1L)), .Names = c("tab", "Var1", "Var2", "Var3", "Var4", "Var5", 
"Var6", "Var7"), row.names = c(NA, -3L), class = "data.frame")
> dput(setDF(in_table))
structure(list(POL = c(8035L, 36763L, 36763L, 35786L), Var1 = c(1L, 
1L, 1L, 1L), Var2 = c(11L, 88L, 88L, 88L), Var3 = c(1L, 13L, 
13L, 13L), Var4 = c("GRD", "GRD", "GRD", "GRD"), Var5 = c(30L, 
5260L, 5280L, 30L), Var6 = c(110L, 300L, 300L, 110L), Var7 = c("09/30", 
"11/15", "11/15", "09/30")), .Names = c("POL", "Var1", "Var2", 
"Var3", "Var4", "Var5", "Var6", "Var7"), row.names = c(NA, -4L
), class = "data.frame")
res = comb_table[, .(list(in_table[, uniqueN(POL), by = c(names(.SD)[.SD==1])])), by = tab]
#      tab           V1
#1: Table1 <data.table>
#2: Table2 <data.table>
#3: Table3 <data.table>

res$V1
#[[1]]
#   Var1 Var2 Var3 Var4 Var5 Var6  Var7 V1
#1:    1   11    1  GRD   30  110 09/30  1
#2:    1   88   13  GRD 5260  300 11/15  1
#3:    1   88   13  GRD 5280  300 11/15  1
#4:    1   88   13  GRD   30  110 09/30  1
#
#[[2]]
#   Var2 Var3 Var4 Var5 Var6  Var7 V1
#1:   11    1  GRD   30  110 09/30  1
#2:   88   13  GRD 5260  300 11/15  1
#3:   88   13  GRD 5280  300 11/15  1
#4:   88   13  GRD   30  110 09/30  1
#
#[[3]]
#   Var1 Var3 Var4 Var5 Var6  Var7 V1
#1:    1    1  GRD   30  110 09/30  1
#2:    1   13  GRD 5260  300 11/15  1
#3:    1   13  GRD 5280  300 11/15  1
#4:    1   13  GRD   30  110 09/30  1