R 转换为数字类型后更改的值
原始数据R 转换为数字类型后更改的值,r,transform,R,Transform,原始数据 chr pos ref alt region gene co1l col2 col3 col4 col5 col6 chr11 10000117 G A intronic SBF2 0.28 0.2813 . 0.008683 0.0157 2.091 chr11 100001537 T C intronic CNTN5 . .
chr pos ref alt region gene co1l col2 col3 col4 col5 col6
chr11 10000117 G A intronic SBF2 0.28 0.2813 . 0.008683 0.0157 2.091
chr11 100001537 T C intronic CNTN5 . . . . -0.1877 1.202
chr11 100002012 A G intronic CNTN5 1.0 0.7227 . 0.764062 -0.4256 1.584
chr11 10000210 G C intronic SBF2 0.28 0.2813 . 0.222606 -0.3470 0.179
当我试着
filter <- filter(data,data$col1>=0.5 | t$col1 == ".")
我不知道为什么这些价值观都改变了,谁能帮助解决这个问题
谢谢 首先将值从
“
转换为NA
df[cols.num][df[cols.num] == "."] <- NA
数据
df <- structure(list(chr = structure(c(1L, 1L, 1L, 1L), .Label = "chr11",
class = "factor"), pos = c(10000117L, 100001537L, 100002012L, 10000210L),
ref = structure(c(2L, 3L, 1L, 2L), .Label = c("A", "G", "T"), class = "factor"),
alt = structure(c(1L, 2L, 3L, 2L), .Label = c("A", "C", "G"
), class = "factor"), region = structure(c(1L, 1L, 1L, 1L
), .Label = "intronic", class = "factor"), gene = structure(c(2L,
1L, 1L, 2L), .Label = c("CNTN5", "SBF2"), class = "factor"),
col1 = structure(c(2L, 1L, 3L, 2L), .Label = c(".", "0.28",
"1.0"), class = "factor"), col2 = structure(c(2L, 1L, 3L,
2L), .Label = c(".", "0.2813", "0.7227"), class = "factor"),
col3 = structure(c(1L, 1L, 1L, 1L), .Label = ".", class = "factor"),
col4 = structure(c(2L, 1L, 4L, 3L), .Label = c(".", "0.008683",
"0.222606", "0.764062"), class = "factor"), col5 = c(0.0157,
-0.1877, -0.4256, -0.347), col6 = c(2.091, 1.202, 1.584,
0.179)), class = "data.frame", row.names = c(NA, -4L))
df在问题出现之前解决问题:myData请避免使用函数名作为变量<代码>过滤器
df[cols.num][df[cols.num] == "."] <- NA
df[cols.num] <- lapply(df[cols.num], function(x) as.numeric(as.character(x)))
df
#chr pos ref alt region gene col1 col2 col3 col4 col5 col6
#1 chr11 10000117 G A intronic SBF2 0.28 0.2813 NA 0.008683 0.0157 2.091
#2 chr11 100001537 T C intronic CNTN5 NA NA NA NA -0.1877 1.202
#3 chr11 100002012 A G intronic CNTN5 1.00 0.7227 NA 0.764062 -0.4256 1.584
#4 chr11 10000210 G C intronic SBF2 0.28 0.2813 NA 0.222606 -0.3470 0.179
str(df[cols.num])
#'data.frame': 4 obs. of 4 variables:
# $ col1: num 0.28 NA 1 0.28
# $ col2: num 0.281 NA 0.723 0.281
# $ col3: num NA NA NA NA
# $ col4: num 0.00868 NA 0.76406 0.22261
df[df$col1 > 0.5 & !is.na(df$col1), ]
# chr pos ref alt region gene col1 col2 col3 col4 col5 col6
#3 chr11 100002012 A G intronic CNTN5 1 0.7227 NA 0.764062 -0.4256 1.584
df <- structure(list(chr = structure(c(1L, 1L, 1L, 1L), .Label = "chr11",
class = "factor"), pos = c(10000117L, 100001537L, 100002012L, 10000210L),
ref = structure(c(2L, 3L, 1L, 2L), .Label = c("A", "G", "T"), class = "factor"),
alt = structure(c(1L, 2L, 3L, 2L), .Label = c("A", "C", "G"
), class = "factor"), region = structure(c(1L, 1L, 1L, 1L
), .Label = "intronic", class = "factor"), gene = structure(c(2L,
1L, 1L, 2L), .Label = c("CNTN5", "SBF2"), class = "factor"),
col1 = structure(c(2L, 1L, 3L, 2L), .Label = c(".", "0.28",
"1.0"), class = "factor"), col2 = structure(c(2L, 1L, 3L,
2L), .Label = c(".", "0.2813", "0.7227"), class = "factor"),
col3 = structure(c(1L, 1L, 1L, 1L), .Label = ".", class = "factor"),
col4 = structure(c(2L, 1L, 4L, 3L), .Label = c(".", "0.008683",
"0.222606", "0.764062"), class = "factor"), col5 = c(0.0157,
-0.1877, -0.4256, -0.347), col6 = c(2.091, 1.202, 1.584,
0.179)), class = "data.frame", row.names = c(NA, -4L))