如何根据R中列的条件填充NA行
以下是一个例子:如何根据R中列的条件填充NA行,r,R,以下是一个例子: df<-data.frame(v1=rep(1:2, 4), v2=rep(c("a", "b"), each=4), v3=paste0(rep(1:2, each=4), rep(c("m", "n", "o", "p"), each=2)), v4=c(1,2, NA, NA, 3,4, NA,NA), v5=c(5,6, NA, NA, 7
df<-data.frame(v1=rep(1:2, 4),
v2=rep(c("a", "b"), each=4),
v3=paste0(rep(1:2, each=4), rep(c("m", "n", "o", "p"), each=2)),
v4=c(1,2, NA, NA, 3,4, NA,NA),
v5=c(5,6, NA, NA, 7,8, NA,NA),
v6=c(9,10, NA, NA, 11,12, NA,NA))
df
v1 v2 v3 v4 v5 v6
1 1 a 1m 1 5 9
2 2 a 1m 2 6 10
3 1 a 1n NA NA NA
4 2 a 1n NA NA NA
5 1 b 2o 3 7 11
6 2 b 2o 4 8 12
7 1 b 2p NA NA NA
8 2 b 2p NA NA NA
我不知道,但我认为这是一种更简单的产生结果的方法
library(tidyverse)
df %>%
group_by(v1,v2) %>%
fill(v4:v6)
添加v3逻辑
使用
zoo的na.locf
library(zoo)
library(data.table)
setDT(df)[, na.locf(.SD),.(v1, v2)]
# v1 v2 v3 v4 v5 v6
#1: 1 a 1m 1 5 9
#2: 1 a 1n 1 5 9
#3: 2 a 1m 2 6 10
#4: 2 a 1n 2 6 10
#5: 1 b 2o 3 7 11
#6: 1 b 2p 3 7 11
#7: 2 b 2o 4 8 12
#8: 2 b 2p 4 8 12
如果要在“v3”中添加条件
setDT(df)[, names(df)[4:6] := na.locf(.SD),.(v1, v2, sub("\\D+", "", v3))][]
# v1 v2 v3 v4 v5 v6
#1: 1 a 1m 1 5 9
#2: 2 a 1m 2 6 10
#3: 1 a 1n 1 5 9
#4: 2 a 1n 2 6 10
#5: 1 b 2o 3 7 11
#6: 2 b 2o 4 8 12
#7: 1 b 2p 3 7 11
#8: 2 b 2p 4 8 12
这里有一个解决方案,它将v3
重新编码为一个只考虑数字部分的变量
library(dplyr)
library(stringr)
#Extract numeric part of the string in v3
df$v7<-str_extract(df$v3,"[[:digit:]]+")
df %>%
group_by(v1,v2,v7) %>%
fill(v4:v6)
库(dplyr)
图书馆(stringr)
#在v3中提取字符串的数字部分
df$v7%
分组依据(v1、v2、v7)%>%
填充(v4:v6)
下面是一个使用数据的解决方案。表
和zoo
忽略v3
列的最后一个字母:
library(data.table)
setDT(df)[, match_cols := paste0(v1, v2, substr(v3, 1, nchar(as.character(v3)) - 1))][, id := .GRP, by = match_cols][, v4 := zoo::na.locf(v4, na.rm = F), by = id][, v5 := zoo::na.locf(v5, na.rm = F), by = id][, v6 := zoo::na.locf(v6, na.rm = F), by = id][ , c("match_cols", "id") := NULL]
df
# v1 v2 v3 v4 v5 v6
#1: 1 a 1m 1 5 9
#2: 2 a 1m 2 6 10
#3: 1 a 1n 1 5 9
#4: 2 a 1n 2 6 10
#5: 1 b 2o 3 7 11
#6: 2 b 2o 4 8 12
#7: 1 b 2p 3 7 11
#8: 2 b 2p 4 8 12
到目前为止你试过什么?如果我们能看到一些代码尝试,您的逻辑可能会更清晰
library(dplyr)
library(stringr)
#Extract numeric part of the string in v3
df$v7<-str_extract(df$v3,"[[:digit:]]+")
df %>%
group_by(v1,v2,v7) %>%
fill(v4:v6)
library(data.table)
setDT(df)[, match_cols := paste0(v1, v2, substr(v3, 1, nchar(as.character(v3)) - 1))][, id := .GRP, by = match_cols][, v4 := zoo::na.locf(v4, na.rm = F), by = id][, v5 := zoo::na.locf(v5, na.rm = F), by = id][, v6 := zoo::na.locf(v6, na.rm = F), by = id][ , c("match_cols", "id") := NULL]
df
# v1 v2 v3 v4 v5 v6
#1: 1 a 1m 1 5 9
#2: 2 a 1m 2 6 10
#3: 1 a 1n 1 5 9
#4: 2 a 1n 2 6 10
#5: 1 b 2o 3 7 11
#6: 2 b 2o 4 8 12
#7: 1 b 2p 3 7 11
#8: 2 b 2p 4 8 12