在dataframe中添加每行最大出现次数的列

在dataframe中添加每行最大出现次数的列,r,dataframe,R,Dataframe,我有以下数据框: > example var1 var2 var3 1 a b a 2 b b c 3 d e d 4 g g g 我想添加四个新列: 每行的最大出现次数及其出现次数和最小出现次数。 结果将是: > example var1 var2 var3 max_res max_num min_res min_num 1 a b a a 2 b

我有以下数据框:

> example
  var1 var2 var3
1    a    b    a
2    b    b    c
3    d    e    d
4    g    g    g
我想添加四个新列: 每行的最大出现次数及其出现次数和最小出现次数。 结果将是:

> example
  var1 var2 var3 max_res max_num min_res min_num
1    a    b    a      a     2       b       1
2    b    b    c      b     2       c       1
3    d    e    d      d     2       e       1
4    g    g    g      g     3               0
所有值都是一个字母的示例,但它们可以是数字,也可以超过一个字母。
我该怎么做呢?

我想不出一个有效的方法来实现这一点,所以这里有一个低效的方法

定义辅助函数

myFunc <- function(x) {
  temp <- as.data.frame(sort(table(x), decreasing  = TRUE))
  res <- data.frame(max_res = character(1), max_num = integer(1), min_res = character(1), min_num = integer(1))
  if(nrow(temp) == 2){    
    res[c(1, 3)] <- row.names(temp)
    res[c(2, 4)] <- temp[, 1]
   } else {
     res[1] <- row.names(temp)
     res[c(2, 4)] <- c(temp[, 1], 0)   
   }
  res
}

myFunc这似乎更快:

f <- function(df)
{
  X <- as.data.frame(t(apply( df, 1,
                              function(row)
                              {
                                u <- unique(row)
                                n <- rowSums(outer(u,row,"=="))
                                c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
                              })))

  colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")

  return(X)
}

g <- function(df)
{
  X <- as.data.frame(t(apply( df, 1,
                              function(row)
                              {
                                u <- unique(row)
                                n <- rowSums(outer(u,row,"=="))
                                if (length(u)==1 )
                                {
                                  c(row,u[which.max(n)],max(n),"",0)
                                }
                                else
                                {
                                  c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
                                }
                              })))  

  colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")

  return(X)
}
f <- function(df)
{
  X <- as.data.frame(t(apply( df, 1,
                              function(row)
                              {
                                u <- unique(row)
                                n <- rowSums(outer(u,row,"=="))
                                c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
                              })))

  colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")

  return(X)
}

g <- function(df)
{
  X <- as.data.frame(t(apply( df, 1,
                              function(row)
                              {
                                u <- unique(row)
                                n <- rowSums(outer(u,row,"=="))
                                if (length(u)==1 )
                                {
                                  c(row,u[which.max(n)],max(n),"",0)
                                }
                                else
                                {
                                  c(row,u[which.max(n)],max(n),u[which.min(n)],min(n))
                                }
                              })))  

  colnames(X) <- c(colnames(df),"max_res","max_num","min_res","min_num")

  return(X)
}
> f <- function(df)
+ {
+   X <- as.data.frame(t(apply( df, 1,
+                               function(row)
+                               {
+       .... [TRUNCATED] 

> g <- function(df)
+ {
+   X <- as.data.frame(t(apply( df, 1,
+                               function(row)
+                               {
+       .... [TRUNCATED] 

> #-------------------------------------------------------------------------
> 
> myFunc <- function(x) {
+   temp <- as.data.frame(sort(table(x), dec .... [TRUNCATED] 

> #===========================================================
> 
> example <- data.frame( x=c("a","b","d","g"),
+                        y=c("b","b", .... [TRUNCATED] 

> system.time(
+   for ( n in 1:1000) { X <- f(example) }  
+ )
   user  system elapsed 
   2.08    0.00    2.17 

> X
  x y z max_res max_num min_res min_num
1 a b a       a       2       b       1
2 b b c       b       2       c       1
3 d e d       d       2       e       1
4 g g g       g       3       g       3

> system.time(
+   for ( n in 1:1000) { Y <- g(example) }  
+ )
   user  system elapsed 
   2.28    0.00    2.41 

> Y
  x y z max_res max_num min_res min_num
1 a b a       a       2       b       1
2 b b c       b       2       c       1
3 d e d       d       2       e       1
4 g g g       g       3               0

> system.time(
+   for ( n in 1:1000) { df <- example
+                       df[c("max_res", "max_num", "min_res", "min_num")] <- do.call(rbind, appl .... [TRUNCATED] 
   user  system elapsed 
  11.95    0.02   12.56 

> df
  x y z max_res max_num min_res min_num
1 a b a       a       2       b       1
2 b b c       b       2       c       1
3 d e d       d       2       e       1
4 g g g       g       3               0

> #=====================================================================
> 
> set.seed(1)

> example_2 <- as.data.frame( matrix(sample(1:5,60,replace=TRUE),5,12))

> example_2
  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
1  2  5  2  3  5  2  3  4  5   4   3   1
2  2  5  1  4  2  1  3  4  4   1   5   2
3  3  4  4  5  4  2  3  1  4   3   3   3
4  5  4  2  2  1  5  1  4  3   4   2   4
5  2  1  4  4  2  2  5  3  3   4   1   3

> system.time(
+   for ( n in 1:1000) { X <- f(example_2) }  
+ )
   user  system elapsed 
   0.85    0.00    0.84 

> X
  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 max_res max_num min_res min_num
1  2  5  2  3  5  2  3  4  5   4   3   1       2       3       1       1
2  2  5  1  4  2  1  3  4  4   1   5   2       2       3       3       1
3  3  4  4  5  4  2  3  1  4   3   3   3       3       5       5       1
4  5  4  2  2  1  5  1  4  3   4   2   4       4       4       3       1
5  2  1  4  4  2  2  5  3  3   4   1   3       2       3       5       1

> system.time(
+   for ( n in 1:1000) { Y <- g(example_2) }  
+ )
   user  system elapsed 
   0.91    0.00    0.92 

> Y
  V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 max_res max_num min_res min_num
1  2  5  2  3  5  2  3  4  5   4   3   1       2       3       1       1
2  2  5  1  4  2  1  3  4  4   1   5   2       2       3       3       1
3  3  4  4  5  4  2  3  1  4   3   3   3       3       5       5       1
4  5  4  2  2  1  5  1  4  3   4   2   4       4       4       3       1
5  2  1  4  4  2  2  5  3  3   4   1   3       2       3       5       1

> system.time(
+   for ( n in 1:1000) { df <- example_2
+                        df[c("max_res", "max_num", "min_res", "min_num")] <- do.call(rbind, a .... [TRUNCATED] 
Error in `[<-.data.frame`(`*tmp*`, 1, value = c("2", "3", "5", "4", "1" : 
  replacement has 5 rows, data has 1
Timing stopped at: 0 0 0