表示基于r中单个列的所有其他列_R_Dataframe_Unique_Average

表示基于r中单个列的所有其他列

r dataframe

表示基于r中单个列的所有其他列,r,dataframe,unique,average,R,Dataframe,Unique,Average,我有一个超过40000列的大型数据帧，我遇到了类似的问题我目前正在使用嵌套for循环，如下所示：尽管R是多功能的，但我相信应该有一种更快的方法来实现这一点 idx <- split(1:nrow(shop), shop$shop_id) newdata <- data.frame() for( i in 1:length(idx)){ newdata[i,1]<-c(names(idx)[i] ) for (j in 2:ncol(shop)){

我有一个超过40000列的大型数据帧，我遇到了类似的问题

我目前正在使用嵌套for循环，如下所示：尽管R是多功能的，但我相信应该有一种更快的方法来实现这一点

idx <- split(1:nrow(shop), shop$shop_id)

newdata <- data.frame()

for( i in 1:length(idx)){
    newdata[i,1]<-c(names(idx)[i] )
    for (j in 2:ncol(shop)){
        newdata[i,j]<-mean(shop[unlist(idx[i]),j])
    }
}

idx使用plyr
软件包中的ddply
功能：
> require("plyr")
> ddply(shop, ~shop_id, summarise, Assets=mean(Assets),
        Liabilities=mean(Liabilities), sale=mean(sale), profit=mean(profit))

  shop_id Assets Liabilities      sale   profit
1  Shop A    8.0    5.333333  8.666667 2.333333
2  Shop B    5.0    9.000000 15.000000 6.000000
3  Shop C    5.5   10.000000 14.000000 7.000000

尝试data.table

library(data.table)
setDT(shop)[, lapply(.SD, mean), shop_id]
#  shop_id Assets Liabilities      sale   profit
#1:  Shop A    8.0    5.333333  8.666667 2.333333
#2:  Shop B    5.0    9.000000 15.000000 6.000000
#3:  Shop C    5.5   10.000000 14.000000 7.000000

或
或
对于40000列，我将使用data.table
或可能是dplyr
尝试使用dplyr
：
library("dplyr")
shop %>% group_by(shop_id) %>% summarise_each(funs(mean))

#   shop_id Assets Liabilities      sale   profit
# 1  Shop A    8.0    5.333333  8.666667 2.333333
# 2  Shop B    5.0    9.000000 15.000000 6.000000
# 3  Shop C    5.5   10.000000 14.000000 7.000000

rowsum
可能会有所帮助，这里：
rowsum(shop[-1], shop[[1]]) / table(shop[[1]])
#       Assets Liabilities      sale   profit
#Shop A    8.0    5.333333  8.666667 2.333333
#Shop B    5.0    9.000000 15.000000 6.000000
#Shop C    5.5   10.000000 14.000000 7.000000

这是一个创新的想法
library(dplyr)
shop %>% 
    group_by(shop_id)%>%
    summarise_each(funs(mean))
# shop_id Assets Liabilities      sale   profit
#1  Shop A    8.0    5.333333  8.666667 2.333333
#2  Shop B    5.0    9.000000 15.000000 6.000000
#3  Shop C    5.5   10.000000 14.000000 7.000000

aggregate(.~shop_id, shop, FUN=mean)
#   shop_id Assets Liabilities      sale   profit
#1  Shop A    8.0    5.333333  8.666667 2.333333
#2  Shop B    5.0    9.000000 15.000000 6.000000
#3  Shop C    5.5   10.000000 14.000000 7.000000

library("dplyr")
shop %>% group_by(shop_id) %>% summarise_each(funs(mean))

#   shop_id Assets Liabilities      sale   profit
# 1  Shop A    8.0    5.333333  8.666667 2.333333
# 2  Shop B    5.0    9.000000 15.000000 6.000000
# 3  Shop C    5.5   10.000000 14.000000 7.000000

rowsum(shop[-1], shop[[1]]) / table(shop[[1]])
#       Assets Liabilities      sale   profit
#Shop A    8.0    5.333333  8.666667 2.333333
#Shop B    5.0    9.000000 15.000000 6.000000
#Shop C    5.5   10.000000 14.000000 7.000000