R 循环或函数以创建新列并基于列表填充
我想编写一个函数或循环,创建三个新列,然后用相同的值或指定的值填充这些列,如果原始列中的值在三个指定列表中的一个内 例如,以下是数据的外观:R 循环或函数以创建新列并基于列表填充,r,list,function,loops,R,List,Function,Loops,我想编写一个函数或循环,创建三个新列,然后用相同的值或指定的值填充这些列,如果原始列中的值在三个指定列表中的一个内 例如,以下是数据的外观: > data a1 a2 a3 1 C C A 2 A B_20 B_20 3 A C B_30 4 C C B_40 5 C A A 6 B_60 B_60 B_60 7 A A C 8 A C B_80 9 B_90 C B_90
> data
a1 a2 a3
1 C C A
2 A B_20 B_20
3 A C B_30
4 C C B_40
5 C A A
6 B_60 B_60 B_60
7 A A C
8 A C B_80
9 B_90 C B_90
我想创建三个新列(a1
,a2
,a3
),其中ifa1
位于list1
list1 <-c('B_10','B_20','B_30')
然后用B\u 40\u 60
或者如果a1
在list3
list3 <-c('B_70','B_80','B_90')
val <- sapply(mget(ls(pattern="list")),function(x) max(as.numeric(gsub("._","",x))))
val
# list1 list2 list3
# 30 60 90
#indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,val),labels=F)
indx[!is.na(indx)] <- c("B_00_30","B_40_60", "B_70_90")[indx[!is.na(indx)]]
indx[is.na(indx)] <- unlist(data)[!grepl("_", unlist(data))]
data1 <- data
data1[] <- indx
colnames(data1) <- paste(colnames(data1),"t",sep="_")
要创建原始数据,请执行以下操作:
data <- structure(list(a1 = c("A", "B_20", "B_30", "C", "A", "C", "C",
"B_80", "B_90"), a2 = c("A", "A", "A", "C", "B_50", "C", "B_70",
"C", "C"), a3 = c("B_10", "C", "C", "A", "B_50", "A", "A", "B_80",
"A")), class = "data.frame", .Names = c("a1", "a2", "a3"), row.names = c(NA,
-9L))
数据一种方法可以是:
lookup = rbind(cbind(list = list1, val = "B_00_30"),
cbind(list2, "B_40_60"),
cbind(list3, "B_70_90"))
sapply(data, function(x) {
tmp = lookup[, 2][match(x, lookup[, 1])]
ifelse(is.na(tmp), x, tmp)
})
# a1 a2 a3
# [1,] "A" "A" "B_00_30"
# [2,] "B_00_30" "A" "C"
# [3,] "B_00_30" "A" "C"
# [4,] "C" "C" "A"
# [5,] "A" "B_40_60" "B_40_60"
# [6,] "C" "C" "A"
# [7,] "C" "B_70_90" "A"
# [8,] "B_70_90" "C" "B_70_90"
# [9,] "B_70_90" "C" "A"
然后,您可以根据需要cbind
到“data”并强制到“data.frame”。另一种方法是使用cut
indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,30,60,90),labels=F)
更新
要避免出现警告消息,可以执行以下操作:
m1 <- as.matrix(data)
indx <- grepl("\\d",gsub(".\\_","",m1))
indx1 <- cut(as.numeric(gsub(".\\_","",m1[indx])),breaks=c(0,30,60,90),labels=F)
m1[indx] <- c("B_00_30", "B_40_60", "B_70_90")[indx1]
data1 <- data
data1[] <- m1
colnames(data1) <- paste(colnames(data1),"t",sep="_")
cbind(data, data1)
# a1 a2 a3 a1_t a2_t a3_t
# 1 A A B_10 A A B_00_30
# 2 B_20 A C B_00_30 A C
# 3 B_30 A C B_00_30 A C
# 4 C C A C C A
# 5 A B_50 B_50 A B_40_60 B_40_60
# 6 C C A C C A
# 7 C B_70 A C B_70_90 A
# 8 B_80 C B_80 B_70_90 C B_70_90
# 9 B_90 C A B_70_90 C A
m1对于查找
,您也可以进行melt(setNames(列表1,列表2,列表3),c(“B_00_30”,“B_40_60”,“B_70-90”))
谢谢@alexis_laz,这工作很好,没有像我运行akrun代码时那样产生任何警告消息。我已经编辑了我的问题,包括重命名列、强制cols和cbinding。关于警告消息,(我应该提到它,对不起),我正在使用的属性as.numeric
使所有非数字元素都为NA。当然,它会打开警告。您可以使用suppressWarnings
或更新的脚本(如果需要)来避免警告。
lookup = rbind(cbind(list = list1, val = "B_00_30"),
cbind(list2, "B_40_60"),
cbind(list3, "B_70_90"))
sapply(data, function(x) {
tmp = lookup[, 2][match(x, lookup[, 1])]
ifelse(is.na(tmp), x, tmp)
})
# a1 a2 a3
# [1,] "A" "A" "B_00_30"
# [2,] "B_00_30" "A" "C"
# [3,] "B_00_30" "A" "C"
# [4,] "C" "C" "A"
# [5,] "A" "B_40_60" "B_40_60"
# [6,] "C" "C" "A"
# [7,] "C" "B_70_90" "A"
# [8,] "B_70_90" "C" "B_70_90"
# [9,] "B_70_90" "C" "A"
indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,30,60,90),labels=F)
val <- sapply(mget(ls(pattern="list")),function(x) max(as.numeric(gsub("._","",x))))
val
# list1 list2 list3
# 30 60 90
#indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,val),labels=F)
indx[!is.na(indx)] <- c("B_00_30","B_40_60", "B_70_90")[indx[!is.na(indx)]]
indx[is.na(indx)] <- unlist(data)[!grepl("_", unlist(data))]
data1 <- data
data1[] <- indx
colnames(data1) <- paste(colnames(data1),"t",sep="_")
m1 <- as.matrix(data)
indx <- grepl("\\d",gsub(".\\_","",m1))
indx1 <- cut(as.numeric(gsub(".\\_","",m1[indx])),breaks=c(0,30,60,90),labels=F)
m1[indx] <- c("B_00_30", "B_40_60", "B_70_90")[indx1]
data1 <- data
data1[] <- m1
colnames(data1) <- paste(colnames(data1),"t",sep="_")
cbind(data, data1)
# a1 a2 a3 a1_t a2_t a3_t
# 1 A A B_10 A A B_00_30
# 2 B_20 A C B_00_30 A C
# 3 B_30 A C B_00_30 A C
# 4 C C A C C A
# 5 A B_50 B_50 A B_40_60 B_40_60
# 6 C C A C C A
# 7 C B_70 A C B_70_90 A
# 8 B_80 C B_80 B_70_90 C B_70_90
# 9 B_90 C A B_70_90 C A