R_如何用其他两列的平均值填充一列中的空格?

R_如何用其他两列的平均值填充一列中的空格?,r,R,我试图用var2和var3的平均值来填补var1中的空白,但我无法让它工作。这就是我迄今为止所尝试的: df <- data.frame(var1=c(1,2,"",3,3,"","",2,2,6,7,3,"","","",3,3,11,12,2,"",3)) df$var2 <- c(1,8,9,1,1,5,8,8,3,2,0,9,4,4,7,3,5,5,2,4,6,6) df$var3 <- c(4,1,1,4,4,6,7,8,9,10,11,12,13,14,15,16,

我试图用var2和var3的平均值来填补var1中的空白,但我无法让它工作。这就是我迄今为止所尝试的:

df <- data.frame(var1=c(1,2,"",3,3,"","",2,2,6,7,3,"","","",3,3,11,12,2,"",3))
df$var2 <- c(1,8,9,1,1,5,8,8,3,2,0,9,4,4,7,3,5,5,2,4,6,6)
df$var3 <- c(4,1,1,4,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22)


 for(i in 1:length(df$var1)) {
   ifelse(is.na(df$var1[i]), df$var1[i] <- mean(df$var2[i], df$var3[i]), df$var1[i] == df$var1[i])
 }
df试试这个:

df <- data.frame(var1 = c(1,2,"",3,3,"","",2,2,6,7,3,"","","",3,3,11,12,2,"",3),
                 var2 = c(1,8,9,1,1,5,8,8,3,2,0,9,4,4,7,3,5,5,2,4,6,6),
                 var3 = c(4,1,1,4,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22),
                 stringsAsFactors = FALSE)
df[df==""] <- "NA"

for (i in 1:length(df$var1)) {
  if (df$var1[i]== "NA") {
    df$var1[i] = rowMeans(df[i, 2:3])
  } else {
    df$var1[i] = df$var1[i]
  }
}

df没有任何循环的另一种方式:

library(dplyr)

df %>% 
  mutate_at(vars(var1:var3), as.numeric) %>%
  mutate(var1 = case_when(is.na(var1) ~ (var2+var3)/2, TRUE ~ var1))
#>    var1 var2 var3
#> 1   1.0    1    4
#> 2   2.0    8    1
#> 3   5.0    9    1
#> 4   3.0    1    4
#> 5   3.0    1    4
#> 6   5.5    5    6
#> 7   7.5    8    7
#> 8   2.0    8    8
#> 9   2.0    3    9
#> 10  6.0    2   10
#> 11  7.0    0   11
#> 12  3.0    9   12
#> 13  8.5    4   13
#> 14  9.0    4   14
#> 15 11.0    7   15
#> 16  3.0    3   16
#> 17  3.0    5   17
#> 18 11.0    5   18
#> 19 12.0    2   19
#> 20  2.0    4   20
#> 21 13.5    6   21
#> 22  3.0    6   22

这里我将使用data.table方法。它应该能很好地处理较大的数据,并避免在您不需要的数据上循环

library(data.table)
dt <- data.table(var1=c(1,2,"",3,3,"","",2,2,6,7,3,"","","",3,3,11,12,2,"",3),
                  var2 = c(1,8,9,1,1,5,8,8,3,2,0,9,4,4,7,3,5,5,2,4,6,6),
                  var3 = c(4,1,1,4,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22))
dt[, var1 := as.numeric(var1)]
dt[is.na(var1), var1 := apply(.SD, 1, mean), .SDcols =c("var2", "var3")]

dt


    var1 var2 var3
 1:  1.0    1    4
 2:  2.0    8    1
 3:  5.0    9    1
 4:  3.0    1    4
 5:  3.0    1    4
 6:  5.5    5    6
 7:  7.5    8    7
 8:  2.0    8    8
 9:  2.0    3    9
10:  6.0    2   10
11:  7.0    0   11
12:  3.0    9   12
13:  8.5    4   13
14:  9.0    4   14
15: 11.0    7   15
16:  3.0    3   16
17:  3.0    5   17
18: 11.0    5   18
19: 12.0    2   19
20:  2.0    4   20
21: 13.5    6   21
22:  3.0    6   22
库(data.table)

dt在循环之前添加此项:
df[df==“”]
df[df==""] <- NA

for (i in 1:length(df[,1])) {
  ifelse (is.na(df[i,1]), df[i,1] <- rowMeans(df[i, 2:3]), df[i,1] <- df[i,1])
}
library(dplyr)

df %>% 
  mutate_at(vars(var1:var3), as.numeric) %>%
  mutate(var1 = case_when(is.na(var1) ~ (var2+var3)/2, TRUE ~ var1))
#>    var1 var2 var3
#> 1   1.0    1    4
#> 2   2.0    8    1
#> 3   5.0    9    1
#> 4   3.0    1    4
#> 5   3.0    1    4
#> 6   5.5    5    6
#> 7   7.5    8    7
#> 8   2.0    8    8
#> 9   2.0    3    9
#> 10  6.0    2   10
#> 11  7.0    0   11
#> 12  3.0    9   12
#> 13  8.5    4   13
#> 14  9.0    4   14
#> 15 11.0    7   15
#> 16  3.0    3   16
#> 17  3.0    5   17
#> 18 11.0    5   18
#> 19 12.0    2   19
#> 20  2.0    4   20
#> 21 13.5    6   21
#> 22  3.0    6   22
library(data.table)
dt <- data.table(var1=c(1,2,"",3,3,"","",2,2,6,7,3,"","","",3,3,11,12,2,"",3),
                  var2 = c(1,8,9,1,1,5,8,8,3,2,0,9,4,4,7,3,5,5,2,4,6,6),
                  var3 = c(4,1,1,4,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22))
dt[, var1 := as.numeric(var1)]
dt[is.na(var1), var1 := apply(.SD, 1, mean), .SDcols =c("var2", "var3")]

dt


    var1 var2 var3
 1:  1.0    1    4
 2:  2.0    8    1
 3:  5.0    9    1
 4:  3.0    1    4
 5:  3.0    1    4
 6:  5.5    5    6
 7:  7.5    8    7
 8:  2.0    8    8
 9:  2.0    3    9
10:  6.0    2   10
11:  7.0    0   11
12:  3.0    9   12
13:  8.5    4   13
14:  9.0    4   14
15: 11.0    7   15
16:  3.0    3   16
17:  3.0    5   17
18: 11.0    5   18
19: 12.0    2   19
20:  2.0    4   20
21: 13.5    6   21
22:  3.0    6   22