如何使用for循环获取每个列的异常值?
我有571列对应于571种不同的代谢物,还有一列带有参与者ID。使用下面的代码,我可以分别识别每个代谢物有异常值的参与者(平均值+/-3*sd) 我正在处理for循环,但一直遇到错误。任何帮助都将不胜感激如何使用for循环获取每个列的异常值?,r,for-loop,R,For Loop,我有571列对应于571种不同的代谢物,还有一列带有参与者ID。使用下面的代码,我可以分别识别每个代谢物有异常值的参与者(平均值+/-3*sd) 我正在处理for循环,但一直遇到错误。任何帮助都将不胜感激 for ( i in 1:ncol(data[, 2:572]){ outlier1 <-data[,i] > mean(data[,i]) + 3*sd(data[, i]) outlier2 <- data[,i] > mean(data[,i]) - 3
for ( i in 1:ncol(data[, 2:572]){
outlier1 <-data[,i] > mean(data[,i]) + 3*sd(data[, i])
outlier2 <- data[,i] > mean(data[,i]) - 3*sd(data[,i])
need <- outlier1 + outlier2
})
for(1中的i:ncol(数据[,2:572]){
异常值1平均值(数据[,i])+3*sd(数据[,i])
离群值2平均值(数据[,i])-3*sd(数据[,i])
need这里有一个好方法:构建一个自定义函数,然后将其应用于您的列:
## custom function
show_outliers = function(x) {
my_mean = mean(x)
my_sd = sd(x)
is_outlier = (x > my_mean + 3 * my_sd) | (x < my_mean - 3 * my_sd)
x[!is_outlier] = NA
return(x)
}
## intialize output
out = data
## apply function to all columns except the first
out[-1] = lapply(out[-1], show_outliers)
##自定义函数
显示异常值=函数(x){
我的平均数=平均数(x)
my_sd=sd(x)
is_outlier=(x>my_mean+3*my_sd)|(x
如果您想修复for循环,我认为这会起作用:
out <- data ## work on out, don't modify data
for (i in 2:ncol(out) { ## start at 2, go to ncol(out)
outlier1 <- out[,i] > mean(out[,i]) + 3*sd(out[, i])
outlier2 <- out[,i] > mean(out[,i]) - 3*sd(out[,i])
## your outlier1 and outlier2 are logical
## so we can combine them with OR |
outliers <- outlier1 | outlier2
## modify the data
out[!all_outliers, i] <- NA
} # delete extra )
out我们可以使用boxplot.stats
查找异常值
library(dplyr)
data1 <- data %>%
mutate(across(-1,
~ replace(., . %in% boxplot.stats(.)$out, NA)))
库(dplyr)
数据1%
突变(跨(-1,
~replace(.%in%boxplot.stats(.%$out,NA)))
out <- data ## work on out, don't modify data
for (i in 2:ncol(out) { ## start at 2, go to ncol(out)
outlier1 <- out[,i] > mean(out[,i]) + 3*sd(out[, i])
outlier2 <- out[,i] > mean(out[,i]) - 3*sd(out[,i])
## your outlier1 and outlier2 are logical
## so we can combine them with OR |
outliers <- outlier1 | outlier2
## modify the data
out[!all_outliers, i] <- NA
} # delete extra )
library(dplyr)
data1 <- data %>%
mutate(across(-1,
~ replace(., . %in% boxplot.stats(.)$out, NA)))