R 使用比较非NA标记行的方式有条件地填充NA行
我想在检查最近的非NA标记行之间的差异的基础上填充NA行 比如说R 使用比较非NA标记行的方式有条件地填充NA行,r,function,dplyr,difference,R,Function,Dplyr,Difference,我想在检查最近的非NA标记行之间的差异的基础上填充NA行 比如说 data <- data.frame(sd_value=c(34,33,34,37,36,45), value=c(383,428,437,455,508,509), label=c(c("bad",rep(NA,4),"unable"))) > data sd_value value labe
data <- data.frame(sd_value=c(34,33,34,37,36,45),
value=c(383,428,437,455,508,509),
label=c(c("bad",rep(NA,4),"unable")))
> data
sd_value value label
1 34 383 bad
2 33 428 <NA>
3 34 437 <NA>
4 37 455 <NA>
5 36 508 <NA>
6 45 509 unable
我想要如何标记NA
行的条件是
如果
diff_val一个选项是查找NA
和非NA索引,并根据条件选择离其最近的标签
library(dplyr)
#Create a new dataframe with diff_val and diff_sd_val
data1 <- data%>% mutate(diff_val=c(0,diff(value)), diff_sd_val=c(0,diff(sd_value)))
#Get the NA indices
NA_inds <- which(is.na(data1$label))
#Get the non-NA indices
non_NA_inds <- setdiff(1:nrow(data1), NA_inds)
#For every NA index
for (i in NA_inds) {
#Check the condition
if(data1$diff_sd_val[i] < 9 & data1$diff_val[i] < 50)
#Get the last non-NA label
data1$label[i] <- data1$label[non_NA_inds[which.max(i > non_NA_inds)]]
else
#Get the first non-NA label after last NA value
data1$label[i] <- data1$label[non_NA_inds[i < non_NA_inds]]
}
data1
# sd_value value label diff_val diff_sd_val
#1 34 383 bad 0 0
#2 33 428 bad 45 -1
#3 34 437 bad 9 1
#4 37 455 bad 18 3
#5 36 508 unable 53 -1
#6 45 509 unable 1 9
然后应用它
data%>%
mutate(diff_val = c(0, diff(value)),
diff_sd_val = c(0, diff(sd_value)),
new_label = custom_label(label, diff_val, diff_sd_val))
# sd_value value label diff_val diff_sd_val new_label
#1 34 383 bad 0 0 bad
#2 33 428 <NA> 45 -1 bad
#3 34 437 <NA> 9 1 bad
#4 37 455 <NA> 18 3 bad
#5 36 508 <NA> 53 -1 unable
#6 45 509 unable 1 9 unable
谢谢你的解决方案。另外,我需要在分组数据中实际执行此操作。所以你能稍微调整一下你的解决方案吗?所以它可以这样使用mutate(新标签=自定义标签(值,sd值,标签))
?@Alexander Sure。我已经更新了答案。让我知道它是否对你有效。感谢Ronak的快速行动。我想如果在这个意义上需要,我可以在函数中添加更多条件。谢谢
custom_labelling <- function(x,y,label){
diff_sd_val<-c(NA,diff(x))
diff_val<-c(NA,diff(y))
label <- NA
for (i in 1:length(label)){
if(is.na(label[i])&diff_sd_val<9&diff_val<50){
label[i] <- label
}
else {
label <- label[i]
}
}
return(label)
}
data%>%
mutate(diff_val=c(0,diff(value)), diff_sd_val=c(0,diff(sd_value)))%>%
mutate(custom_label=custom_labelling(sd_value,value,label))
library(dplyr)
#Create a new dataframe with diff_val and diff_sd_val
data1 <- data%>% mutate(diff_val=c(0,diff(value)), diff_sd_val=c(0,diff(sd_value)))
#Get the NA indices
NA_inds <- which(is.na(data1$label))
#Get the non-NA indices
non_NA_inds <- setdiff(1:nrow(data1), NA_inds)
#For every NA index
for (i in NA_inds) {
#Check the condition
if(data1$diff_sd_val[i] < 9 & data1$diff_val[i] < 50)
#Get the last non-NA label
data1$label[i] <- data1$label[non_NA_inds[which.max(i > non_NA_inds)]]
else
#Get the first non-NA label after last NA value
data1$label[i] <- data1$label[non_NA_inds[i < non_NA_inds]]
}
data1
# sd_value value label diff_val diff_sd_val
#1 34 383 bad 0 0
#2 33 428 bad 45 -1
#3 34 437 bad 9 1
#4 37 455 bad 18 3
#5 36 508 unable 53 -1
#6 45 509 unable 1 9
custom_label <- function(label, diff_val, diff_sd_val) {
NA_inds <- which(is.na(label))
non_NA_inds <- setdiff(1:length(label), NA_inds)
new_label = label
for (i in NA_inds) {
if(diff_sd_val[i] < 9 & diff_val[i] < 50)
new_label[i] <- label[non_NA_inds[which.max(i > non_NA_inds)]]
else
new_label[i] <- label[non_NA_inds[i < non_NA_inds]]
}
return(new_label)
}
data%>%
mutate(diff_val = c(0, diff(value)),
diff_sd_val = c(0, diff(sd_value)),
new_label = custom_label(label, diff_val, diff_sd_val))
# sd_value value label diff_val diff_sd_val new_label
#1 34 383 bad 0 0 bad
#2 33 428 <NA> 45 -1 bad
#3 34 437 <NA> 9 1 bad
#4 37 455 <NA> 18 3 bad
#5 36 508 <NA> 53 -1 unable
#6 45 509 unable 1 9 unable
data%>%
group_by(group) %>%
mutate(diff_val = c(0, diff(value)),
diff_sd_val = c(0, diff(sd_value)),
new_label = custom_label(label, diff_val, diff_sd_val))