R中多列的条件平均值?
我的数据如下:R中多列的条件平均值?,r,for-loop,mean,R,For Loop,Mean,我的数据如下: train <- data.frame(y=c(1,2,1,1), x1=c(2,4,NA,5), x2=c(8,NA,6,12)) 列车库(dplyr) 列车%>% 组别(y)%>% 在(变量(-y)、函数(v)处进行变异{ 如果其他(是.na(v),平均值(v,na.rm=TRUE),v) }) %>% 解组() ##一个tibble:4x3 #y x1 x2 # #1 1 2 8 #2 2 4南 #3 1 3.5
train <- data.frame(y=c(1,2,1,1), x1=c(2,4,NA,5), x2=c(8,NA,6,12))
列车库(dplyr)
列车%>%
组别(y)%>%
在(变量(-y)、函数(v)处进行变异{
如果其他(是.na(v),平均值(v,na.rm=TRUE),v)
}) %>%
解组()
##一个tibble:4x3
#y x1 x2
#
#1 1 2 8
#2 2 4南
#3 1 3.5 6
#4 1 5 12
在按“y”列分组后,我们可以使用na.aggregate
library(dplyr)
library(zoo)
train %>%
group_by(y) %>%
mutate_at(vars(-one_of(group_vars(.))),
~if(all(is.na(.))) NA_real_ else na.aggregate(.))
# A tibble: 4 x 3
# Groups: y [2]
# y x1 x2
# <dbl> <dbl> <dbl>
#1 1 2 8
#2 2 4 NA
#3 1 3.5 6
#4 1 5 12
train[-1] <- unsplit(lapply(split(train[-1], train$y), na.aggregate), train$y)
考虑ave
是否为NA
条件的ifelse
中的分组平均值:
# ITERATE THROUGH ALL COLUMNS BUT FIRST
for(i in c("x1", "x2")) {
train[[i]] <- ifelse(test = is.na(train[[i]]),
yes = ave(train[[i]], train$y, FUN=function(x) mean(x, na.rm=TRUE)),
no = train[[i]])
}
train
# y x1 x2
# 1 1 2.0 8
# 2 2 4.0 NaN
# 3 1 3.5 6
# 4 1 5.0 12
#遍历除第一列以外的所有列
对于(c中的i(“x1”,“x2”){
训练[[i]]尝试library(zoo);library(dplyr);训练%>%group\u by(y)%%>%mutate\u all(na.aggregate)
对于第2行,即y=2,'x2'是na
,如果仍然是na
train[-1] <- unsplit(lapply(split(train[-1], train$y), na.aggregate), train$y)
# ITERATE THROUGH ALL COLUMNS BUT FIRST
for(i in c("x1", "x2")) {
train[[i]] <- ifelse(test = is.na(train[[i]]),
yes = ave(train[[i]], train$y, FUN=function(x) mean(x, na.rm=TRUE)),
no = train[[i]])
}
train
# y x1 x2
# 1 1 2.0 8
# 2 2 4.0 NaN
# 3 1 3.5 6
# 4 1 5.0 12