在dataframe中添加每行最大出现次数的列
我有以下数据框:在dataframe中添加每行最大出现次数的列,r,dataframe,R,Dataframe,我有以下数据框: > example var1 var2 var3 1 a b a 2 b b c 3 d e d 4 g g g 我想添加四个新列: 每行的最大出现次数及其出现次数和最小出现次数。 结果将是: > example var1 var2 var3 max_res max_num min_res min_num 1 a b a a 2 b
> example
var1 var2 var3
1 a b a
2 b b c
3 d e d
4 g g g
我想添加四个新列:
每行的最大出现次数及其出现次数和最小出现次数。
结果将是:
> example
var1 var2 var3 max_res max_num min_res min_num
1 a b a a 2 b 1
2 b b c b 2 c 1
3 d e d d 2 e 1
4 g g g g 3 0
所有值都是一个字母的示例,但它们可以是数字,也可以超过一个字母。
我该怎么做呢?我想不出一个有效的方法来实现这一点,所以这里有一个低效的方法 定义辅助函数
myFunc <- function(x) {
temp <- as.data.frame(sort(table(x), decreasing = TRUE))
res <- data.frame(max_res = character(1), max_num = integer(1), min_res = character(1), min_num = integer(1))
if(nrow(temp) == 2){
res[c(1, 3)] <- row.names(temp)
res[c(2, 4)] <- temp[, 1]
} else {
res[1] <- row.names(temp)
res[c(2, 4)] <- c(temp[, 1], 0)
}
res
}
myFunc这似乎更快:
f <- function(df)
{
X <- as.data.frame(t(apply( df, 1,
function(row)
{
u <- unique(row)
n <- rowSums(outer(u,row,"=="))
c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
})))
colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")
return(X)
}
g <- function(df)
{
X <- as.data.frame(t(apply( df, 1,
function(row)
{
u <- unique(row)
n <- rowSums(outer(u,row,"=="))
if (length(u)==1 )
{
c(row,u[which.max(n)],max(n),"",0)
}
else
{
c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
}
})))
colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")
return(X)
}
f <- function(df)
{
X <- as.data.frame(t(apply( df, 1,
function(row)
{
u <- unique(row)
n <- rowSums(outer(u,row,"=="))
c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
})))
colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")
return(X)
}
g <- function(df)
{
X <- as.data.frame(t(apply( df, 1,
function(row)
{
u <- unique(row)
n <- rowSums(outer(u,row,"=="))
if (length(u)==1 )
{
c(row,u[which.max(n)],max(n),"",0)
}
else
{
c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
}
})))
colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")
return(X)
}
> f <- function(df)
+ {
+ X <- as.data.frame(t(apply( df, 1,
+ function(row)
+ {
+ .... [TRUNCATED]
> g <- function(df)
+ {
+ X <- as.data.frame(t(apply( df, 1,
+ function(row)
+ {
+ .... [TRUNCATED]
> #-------------------------------------------------------------------------
>
> myFunc <- function(x) {
+ temp <- as.data.frame(sort(table(x), dec .... [TRUNCATED]
> #===========================================================
>
> example <- data.frame( x=c("a","b","d","g"),
+ y=c("b","b", .... [TRUNCATED]
> system.time(
+ for ( n in 1:1000) { X <- f(example) }
+ )
user system elapsed
2.08 0.00 2.17
> X
x y z max_res max_num min_res min_num
1 a b a a 2 b 1
2 b b c b 2 c 1
3 d e d d 2 e 1
4 g g g g 3 g 3
> system.time(
+ for ( n in 1:1000) { Y <- g(example) }
+ )
user system elapsed
2.28 0.00 2.41
> Y
x y z max_res max_num min_res min_num
1 a b a a 2 b 1
2 b b c b 2 c 1
3 d e d d 2 e 1
4 g g g g 3 0
> system.time(
+ for ( n in 1:1000) { df <- example
+ df[c("max_res", "max_num", "min_res", "min_num")] <- do.call(rbind, appl .... [TRUNCATED]
user system elapsed
11.95 0.02 12.56
> df
x y z max_res max_num min_res min_num
1 a b a a 2 b 1
2 b b c b 2 c 1
3 d e d d 2 e 1
4 g g g g 3 0
> #=====================================================================
>
> set.seed(1)
> example_2 <- as.data.frame( matrix(sample(1:5,60,replace=TRUE),5,12))
> example_2
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
1 2 5 2 3 5 2 3 4 5 4 3 1
2 2 5 1 4 2 1 3 4 4 1 5 2
3 3 4 4 5 4 2 3 1 4 3 3 3
4 5 4 2 2 1 5 1 4 3 4 2 4
5 2 1 4 4 2 2 5 3 3 4 1 3
> system.time(
+ for ( n in 1:1000) { X <- f(example_2) }
+ )
user system elapsed
0.85 0.00 0.84
> X
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 max_res max_num min_res min_num
1 2 5 2 3 5 2 3 4 5 4 3 1 2 3 1 1
2 2 5 1 4 2 1 3 4 4 1 5 2 2 3 3 1
3 3 4 4 5 4 2 3 1 4 3 3 3 3 5 5 1
4 5 4 2 2 1 5 1 4 3 4 2 4 4 4 3 1
5 2 1 4 4 2 2 5 3 3 4 1 3 2 3 5 1
> system.time(
+ for ( n in 1:1000) { Y <- g(example_2) }
+ )
user system elapsed
0.91 0.00 0.92
> Y
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 max_res max_num min_res min_num
1 2 5 2 3 5 2 3 4 5 4 3 1 2 3 1 1
2 2 5 1 4 2 1 3 4 4 1 5 2 2 3 3 1
3 3 4 4 5 4 2 3 1 4 3 3 3 3 5 5 1
4 5 4 2 2 1 5 1 4 3 4 2 4 4 4 3 1
5 2 1 4 4 2 2 5 3 3 4 1 3 2 3 5 1
> system.time(
+ for ( n in 1:1000) { df <- example_2
+ df[c("max_res", "max_num", "min_res", "min_num")] <- do.call(rbind, a .... [TRUNCATED]
Error in `[<-.data.frame`(`*tmp*`, 1, value = c("2", "3", "5", "4", "1" :
replacement has 5 rows, data has 1
Timing stopped at: 0 0 0