从R中的最小值创建一个新变量

从R中的最小值创建一个新变量,r,R,数据包含四个字段:id、x1、x2和x3 id <- c(1,2,3,4,5,6,7,8,9,10) x1 <- c(2,4,5,3,6,4,3,6,7,7) x2 <- c(0,1,2,6,7,6,0,8,2,2) x3 <- c(5,3,4,5,8,3,4,2,5,6) DF <- data.frame(id, x1,x2,x3) 我需要创建一个新字段,y,其定义如下 if min(x1,x2,x3) = x1, then y = "x1" if min(x

数据包含四个字段:
id、x1、x2和x3

id <- c(1,2,3,4,5,6,7,8,9,10)
x1 <- c(2,4,5,3,6,4,3,6,7,7)
x2 <- c(0,1,2,6,7,6,0,8,2,2)
x3 <- c(5,3,4,5,8,3,4,2,5,6)

DF <- data.frame(id, x1,x2,x3)
我需要创建一个新字段,
y
,其定义如下

if min(x1,x2,x3) = x1, then y = "x1"
if min(x1,x2,x3) = x2, then y = "x2"
if min(x1,x2,x3) = x3, then y = "x3"

注意:我们假设没有关系。

作为简单的解决方案,请执行以下操作:

VARS <- colnames(DF)[-1]
y <- VARS[apply(DF[, -1], MARGIN = 1, FUN = which.min)]
DF$y <- y

VARS作为简单的解决方案,请执行以下操作:

VARS <- colnames(DF)[-1]
y <- VARS[apply(DF[, -1], MARGIN = 1, FUN = which.min)]
DF$y <- y

VARSa
data.表
解决方案:

# create variables
id <- c(1,2,3,4,5,6,7,8,9,10)
x1 <- c(2,4,5,3,6,4,3,6,7,7)
x2 <- c(0,1,2,6,7,6,0,8,2,2)
x3 <- c(5,3,4,5,8,3,4,2,5,6)
DF <- data.frame(id, x1,x2,x3)

# load package and set data table, calculating min
library(data.table)
setDT(DF)[, minx := apply(.SD, 1, min), .SDcols=c("x1", "x2", "x3")]

# Create variable with name of minimum
DF[, y := apply(.SD, 1, function(x) names(x)[which.min(x)]), .SDcols = c("x1", "x2", "x3")]

# call result
DF
##     id x1 x2 x3 minx  y
 1:  1  2  0  5    0 x2
 2:  2  4  1  3    1 x2
 3:  3  5  2  4    2 x2
 4:  4  3  6  5    3 x1
 5:  5  6  7  8    6 x1
 6:  6  4  6  3    3 x3
 7:  7  3  0  4    0 x2
 8:  8  6  8  2    2 x3
 9:  9  7  2  5    2 x2
10: 10  7  2  6    2 x2

当应用于1e6行数据帧(在我的索尼笔记本电脑中约为17秒)时,此解决方案与原始答案中提供的
数据表花费的时间大致相同。

a
数据。表
解决方案:

# create variables
id <- c(1,2,3,4,5,6,7,8,9,10)
x1 <- c(2,4,5,3,6,4,3,6,7,7)
x2 <- c(0,1,2,6,7,6,0,8,2,2)
x3 <- c(5,3,4,5,8,3,4,2,5,6)
DF <- data.frame(id, x1,x2,x3)

# load package and set data table, calculating min
library(data.table)
setDT(DF)[, minx := apply(.SD, 1, min), .SDcols=c("x1", "x2", "x3")]

# Create variable with name of minimum
DF[, y := apply(.SD, 1, function(x) names(x)[which.min(x)]), .SDcols = c("x1", "x2", "x3")]

# call result
DF
##     id x1 x2 x3 minx  y
 1:  1  2  0  5    0 x2
 2:  2  4  1  3    1 x2
 3:  3  5  2  4    2 x2
 4:  4  3  6  5    3 x1
 5:  5  6  7  8    6 x1
 6:  6  4  6  3    3 x3
 7:  7  3  0  4    0 x2
 8:  8  6  8  2    2 x3
 9:  9  7  2  5    2 x2
10: 10  7  2  6    2 x2

当应用于1e6行数据帧(在我的索尼笔记本电脑中约为17秒)时,此解决方案与原始答案中提供的
数据表所需时间大致相同。

这里是另一个使用
pmin
max.col

library(data.table)
setDT(DF)[, c("minx", "y") := list(do.call(pmin, .SD), 
             names(.SD)[max.col(-1*.SD)]), .SDcols= x1:x3]
DF
#    id x1 x2 x3 minx  y
# 1:  1  2  0  5    0 x2
# 2:  2  4  1  3    1 x2
# 3:  3  5  2  4    2 x2
# 4:  4  3  6  5    3 x1
3 5:  5  6  7  8    6 x1
# 6:  6  4  6  3    3 x3
# 7:  7  3  0  4    0 x2
# 8:  8  6  8  2    2 x3
# 9:  9  7  2  5    2 x2
#10: 10  7  2  6    2 x2

这里是另一个使用
pmin
max.col

library(data.table)
setDT(DF)[, c("minx", "y") := list(do.call(pmin, .SD), 
             names(.SD)[max.col(-1*.SD)]), .SDcols= x1:x3]
DF
#    id x1 x2 x3 minx  y
# 1:  1  2  0  5    0 x2
# 2:  2  4  1  3    1 x2
# 3:  3  5  2  4    2 x2
# 4:  4  3  6  5    3 x1
3 5:  5  6  7  8    6 x1
# 6:  6  4  6  3    3 x3
# 7:  7  3  0  4    0 x2
# 8:  8  6  8  2    2 x3
# 9:  9  7  2  5    2 x2
#10: 10  7  2  6    2 x2

您需要去掉ID列,考虑为什么要创建所有这些中间变量,而不仅仅是
DF$y@prolalistatusMaximus需要
which.pmin
和您的data.table解决方案建议在该包中提供。您需要去掉ID列,考虑为什么要创建所有这些中间变量,而不仅仅是
DF$y@拖延症最大化需要一个
which.pmin
和您的数据表解决方案建议在该包中提供。应用函数的替代方案:
DF[,y:=names(.SD)[which.min(.SD)],by=1:nrow(DF),.SDcols=2:4]
应用函数的替代方案:
DF[,y:=names(.SD)[which.min(.SD)],by=1:nrow(DF),.SDcols=2:4]
有关类似问题,请参见
?max.col
<代码>名称(DF[-1])[max.col(-DF[-1],“first”)]
有关类似问题,请参见
?max.col
<代码>名称(DF[-1])[max.col(-DF[-1],“first”)]