Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/reactjs/22.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何根据R中列的条件填充NA行_R - Fatal编程技术网

如何根据R中列的条件填充NA行

如何根据R中列的条件填充NA行,r,R,以下是一个例子: df<-data.frame(v1=rep(1:2, 4), v2=rep(c("a", "b"), each=4), v3=paste0(rep(1:2, each=4), rep(c("m", "n", "o", "p"), each=2)), v4=c(1,2, NA, NA, 3,4, NA,NA), v5=c(5,6, NA, NA, 7

以下是一个例子:

df<-data.frame(v1=rep(1:2, 4), 
               v2=rep(c("a", "b"), each=4), 
               v3=paste0(rep(1:2, each=4), rep(c("m", "n", "o", "p"), each=2)), 
               v4=c(1,2, NA, NA, 3,4, NA,NA),
               v5=c(5,6, NA, NA, 7,8, NA,NA),
               v6=c(9,10, NA, NA, 11,12, NA,NA))

df
  v1 v2 v3 v4 v5 v6
1  1  a 1m  1  5  9
2  2  a 1m  2  6 10
3  1  a 1n NA NA NA
4  2  a 1n NA NA NA
5  1  b 2o  3  7 11
6  2  b 2o  4  8 12
7  1  b 2p NA NA NA
8  2  b 2p NA NA NA

我不知道,但我认为这是一种更简单的产生结果的方法

library(tidyverse)
df %>% 
  group_by(v1,v2) %>% 
  fill(v4:v6)
添加v3逻辑
使用
zoo的
na.locf

library(zoo)
library(data.table)
setDT(df)[, na.locf(.SD),.(v1, v2)]
#    v1 v2 v3 v4 v5 v6
#1:  1  a 1m  1  5  9
#2:  1  a 1n  1  5  9
#3:  2  a 1m  2  6 10
#4:  2  a 1n  2  6 10
#5:  1  b 2o  3  7 11
#6:  1  b 2p  3  7 11
#7:  2  b 2o  4  8 12
#8:  2  b 2p  4  8 12

如果要在“v3”中添加条件

setDT(df)[, names(df)[4:6] := na.locf(.SD),.(v1, v2, sub("\\D+", "", v3))][]
#   v1 v2 v3 v4 v5 v6
#1:  1  a 1m  1  5  9
#2:  2  a 1m  2  6 10
#3:  1  a 1n  1  5  9
#4:  2  a 1n  2  6 10
#5:  1  b 2o  3  7 11
#6:  2  b 2o  4  8 12
#7:  1  b 2p  3  7 11
#8:  2  b 2p  4  8 12

这里有一个解决方案,它将
v3
重新编码为一个只考虑数字部分的变量

library(dplyr)
library(stringr)

#Extract numeric part of the string in v3
df$v7<-str_extract(df$v3,"[[:digit:]]+")

df %>%
  group_by(v1,v2,v7) %>% 
  fill(v4:v6)
库(dplyr)
图书馆(stringr)
#在v3中提取字符串的数字部分
df$v7%
分组依据(v1、v2、v7)%>%
填充(v4:v6)

下面是一个使用
数据的解决方案。表
zoo
忽略
v3
列的最后一个字母:

library(data.table)
setDT(df)[, match_cols := paste0(v1, v2, substr(v3, 1, nchar(as.character(v3)) - 1))][, id := .GRP, by = match_cols][, v4 := zoo::na.locf(v4, na.rm = F), by = id][, v5 := zoo::na.locf(v5, na.rm = F), by = id][, v6 := zoo::na.locf(v6, na.rm = F), by = id][ , c("match_cols", "id") := NULL]
df

#    v1 v2 v3 v4 v5 v6
#1:  1  a 1m  1  5  9
#2:  2  a 1m  2  6 10
#3:  1  a 1n  1  5  9
#4:  2  a 1n  2  6 10
#5:  1  b 2o  3  7 11
#6:  2  b 2o  4  8 12
#7:  1  b 2p  3  7 11
#8:  2  b 2p  4  8 12

到目前为止你试过什么?如果我们能看到一些代码尝试,您的逻辑可能会更清晰
library(dplyr)
library(stringr)

#Extract numeric part of the string in v3
df$v7<-str_extract(df$v3,"[[:digit:]]+")

df %>%
  group_by(v1,v2,v7) %>% 
  fill(v4:v6)
library(data.table)
setDT(df)[, match_cols := paste0(v1, v2, substr(v3, 1, nchar(as.character(v3)) - 1))][, id := .GRP, by = match_cols][, v4 := zoo::na.locf(v4, na.rm = F), by = id][, v5 := zoo::na.locf(v5, na.rm = F), by = id][, v6 := zoo::na.locf(v6, na.rm = F), by = id][ , c("match_cols", "id") := NULL]
df

#    v1 v2 v3 v4 v5 v6
#1:  1  a 1m  1  5  9
#2:  2  a 1m  2  6 10
#3:  1  a 1n  1  5  9
#4:  2  a 1n  2  6 10
#5:  1  b 2o  3  7 11
#6:  2  b 2o  4  8 12
#7:  1  b 2p  3  7 11
#8:  2  b 2p  4  8 12