R 循环或函数以创建新列并基于列表填充

R 循环或函数以创建新列并基于列表填充,r,list,function,loops,R,List,Function,Loops,我想编写一个函数或循环,创建三个新列,然后用相同的值或指定的值填充这些列,如果原始列中的值在三个指定列表中的一个内 例如,以下是数据的外观: > data a1 a2 a3 1 C C A 2 A B_20 B_20 3 A C B_30 4 C C B_40 5 C A A 6 B_60 B_60 B_60 7 A A C 8 A C B_80 9 B_90 C B_90

我想编写一个函数或循环,创建三个新列,然后用相同的值或指定的值填充这些列,如果原始列中的值在三个指定列表中的一个内

例如,以下是数据的外观:

> data
    a1   a2   a3
1    C    C    A
2    A B_20 B_20
3    A    C B_30
4    C    C B_40
5    C    A    A
6 B_60 B_60 B_60
7    A    A    C
8    A    C B_80
9 B_90    C B_90
我想创建三个新列(
a1
a2
a3
),其中if
a1
位于
list1

list1 <-c('B_10','B_20','B_30')
然后用
B\u 40\u 60

或者如果
a1
list3

list3 <-c('B_70','B_80','B_90')
 val <- sapply(mget(ls(pattern="list")),function(x) max(as.numeric(gsub("._","",x))))
  val
 # list1 list2 list3 
 # 30    60    90 

 #indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,val),labels=F)  
 indx[!is.na(indx)] <- c("B_00_30","B_40_60", "B_70_90")[indx[!is.na(indx)]]
 indx[is.na(indx)] <- unlist(data)[!grepl("_", unlist(data))]

  data1 <- data
 data1[] <- indx
 colnames(data1) <- paste(colnames(data1),"t",sep="_")
要创建原始数据,请执行以下操作:

data <- structure(list(a1 = c("A", "B_20", "B_30", "C", "A", "C", "C", 
"B_80", "B_90"), a2 = c("A", "A", "A", "C", "B_50", "C", "B_70", 
"C", "C"), a3 = c("B_10", "C", "C", "A", "B_50", "A", "A", "B_80", 
"A")), class = "data.frame", .Names = c("a1", "a2", "a3"), row.names = c(NA, 
-9L))
数据一种方法可以是:

lookup = rbind(cbind(list = list1, val = "B_00_30"), 
               cbind(list2, "B_40_60"), 
               cbind(list3, "B_70_90"))
sapply(data, function(x) { 
                 tmp = lookup[, 2][match(x, lookup[, 1])] 
                 ifelse(is.na(tmp), x, tmp) 
             })
#      a1        a2        a3       
# [1,] "A"       "A"       "B_00_30"
# [2,] "B_00_30" "A"       "C"      
# [3,] "B_00_30" "A"       "C"      
# [4,] "C"       "C"       "A"      
# [5,] "A"       "B_40_60" "B_40_60"
# [6,] "C"       "C"       "A"      
# [7,] "C"       "B_70_90" "A"      
# [8,] "B_70_90" "C"       "B_70_90"
# [9,] "B_70_90" "C"       "A" 

然后,您可以根据需要
cbind
到“data”并强制到“data.frame”。

另一种方法是使用
cut

 indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,30,60,90),labels=F)
更新 要避免出现警告消息,可以执行以下操作:

 m1 <- as.matrix(data)
indx <- grepl("\\d",gsub(".\\_","",m1))
indx1 <- cut(as.numeric(gsub(".\\_","",m1[indx])),breaks=c(0,30,60,90),labels=F) 
m1[indx] <- c("B_00_30", "B_40_60", "B_70_90")[indx1]
data1 <- data
data1[] <- m1
colnames(data1) <- paste(colnames(data1),"t",sep="_")
 cbind(data, data1)
 #       a1   a2   a3    a1_t    a2_t    a3_t
 #  1    A    A B_10       A       A B_00_30
 #  2 B_20    A    C B_00_30       A       C
 #  3 B_30    A    C B_00_30       A       C
 #  4    C    C    A       C       C       A
 #  5    A B_50 B_50       A B_40_60 B_40_60
 #  6    C    C    A       C       C       A
 #  7    C B_70    A       C B_70_90       A
 #  8 B_80    C B_80 B_70_90       C B_70_90
 #  9 B_90    C    A B_70_90       C       A

m1对于
查找
,您也可以进行
melt(setNames(列表1,列表2,列表3),c(“B_00_30”,“B_40_60”,“B_70-90”))
谢谢@alexis_laz,这工作很好,没有像我运行akrun代码时那样产生任何警告消息。我已经编辑了我的问题,包括重命名列、强制cols和cbinding。关于警告消息,(我应该提到它,对不起),我正在使用
的属性as.numeric
使所有非数字元素都为NA。当然,它会打开警告。您可以使用
suppressWarnings
或更新的脚本(如果需要)来避免警告。
lookup = rbind(cbind(list = list1, val = "B_00_30"), 
               cbind(list2, "B_40_60"), 
               cbind(list3, "B_70_90"))
sapply(data, function(x) { 
                 tmp = lookup[, 2][match(x, lookup[, 1])] 
                 ifelse(is.na(tmp), x, tmp) 
             })
#      a1        a2        a3       
# [1,] "A"       "A"       "B_00_30"
# [2,] "B_00_30" "A"       "C"      
# [3,] "B_00_30" "A"       "C"      
# [4,] "C"       "C"       "A"      
# [5,] "A"       "B_40_60" "B_40_60"
# [6,] "C"       "C"       "A"      
# [7,] "C"       "B_70_90" "A"      
# [8,] "B_70_90" "C"       "B_70_90"
# [9,] "B_70_90" "C"       "A" 
 indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,30,60,90),labels=F)
 val <- sapply(mget(ls(pattern="list")),function(x) max(as.numeric(gsub("._","",x))))
  val
 # list1 list2 list3 
 # 30    60    90 

 #indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,val),labels=F)  
 indx[!is.na(indx)] <- c("B_00_30","B_40_60", "B_70_90")[indx[!is.na(indx)]]
 indx[is.na(indx)] <- unlist(data)[!grepl("_", unlist(data))]

  data1 <- data
 data1[] <- indx
 colnames(data1) <- paste(colnames(data1),"t",sep="_")
 m1 <- as.matrix(data)
indx <- grepl("\\d",gsub(".\\_","",m1))
indx1 <- cut(as.numeric(gsub(".\\_","",m1[indx])),breaks=c(0,30,60,90),labels=F) 
m1[indx] <- c("B_00_30", "B_40_60", "B_70_90")[indx1]
data1 <- data
data1[] <- m1
colnames(data1) <- paste(colnames(data1),"t",sep="_")
 cbind(data, data1)
 #       a1   a2   a3    a1_t    a2_t    a3_t
 #  1    A    A B_10       A       A B_00_30
 #  2 B_20    A    C B_00_30       A       C
 #  3 B_30    A    C B_00_30       A       C
 #  4    C    C    A       C       C       A
 #  5    A B_50 B_50       A B_40_60 B_40_60
 #  6    C    C    A       C       C       A
 #  7    C B_70    A       C B_70_90       A
 #  8 B_80    C B_80 B_70_90       C B_70_90
 #  9 B_90    C    A B_70_90       C       A