R 转换为数字类型后更改的值

R 转换为数字类型后更改的值,r,transform,R,Transform,原始数据 chr pos ref alt region gene co1l col2 col3 col4 col5 col6 chr11 10000117 G A intronic SBF2 0.28 0.2813 . 0.008683 0.0157 2.091 chr11 100001537 T C intronic CNTN5 . .

原始数据

chr     pos         ref alt region      gene    co1l    col2    col3 col4       col5    col6
chr11   10000117    G   A   intronic    SBF2    0.28    0.2813  .   0.008683    0.0157  2.091
chr11   100001537   T   C   intronic    CNTN5   .        .      .     .         -0.1877 1.202
chr11   100002012   A   G   intronic    CNTN5   1.0     0.7227  .   0.764062    -0.4256 1.584
chr11   10000210    G   C   intronic    SBF2    0.28    0.2813  .   0.222606    -0.3470 0.179
当我试着

filter <- filter(data,data$col1>=0.5 | t$col1 == ".")
我不知道为什么这些价值观都改变了,谁能帮助解决这个问题


谢谢

首先将值从
转换为
NA

df[cols.num][df[cols.num] == "."] <- NA
数据

df <- structure(list(chr = structure(c(1L, 1L, 1L, 1L), .Label = "chr11",
class = "factor"), pos = c(10000117L, 100001537L, 100002012L, 10000210L),
ref = structure(c(2L, 3L, 1L, 2L), .Label = c("A", "G", "T"), class = "factor"), 
alt = structure(c(1L, 2L, 3L, 2L), .Label = c("A", "C", "G"
), class = "factor"), region = structure(c(1L, 1L, 1L, 1L
), .Label = "intronic", class = "factor"), gene = structure(c(2L, 
1L, 1L, 2L), .Label = c("CNTN5", "SBF2"), class = "factor"), 
col1 = structure(c(2L, 1L, 3L, 2L), .Label = c(".", "0.28", 
"1.0"), class = "factor"), col2 = structure(c(2L, 1L, 3L, 
2L), .Label = c(".", "0.2813", "0.7227"), class = "factor"), 
col3 = structure(c(1L, 1L, 1L, 1L), .Label = ".", class = "factor"), 
col4 = structure(c(2L, 1L, 4L, 3L), .Label = c(".", "0.008683", 
"0.222606", "0.764062"), class = "factor"), col5 = c(0.0157, 
-0.1877, -0.4256, -0.347), col6 = c(2.091, 1.202, 1.584, 
0.179)), class = "data.frame", row.names = c(NA, -4L))

df在问题出现之前解决问题:
myData请避免使用函数名作为变量<代码>过滤器
df[cols.num][df[cols.num] == "."] <- NA
df[cols.num] <- lapply(df[cols.num], function(x) as.numeric(as.character(x)))
df
#chr       pos ref alt   region  gene col1   col2 col3     col4    col5  col6
#1 chr11  10000117   G   A intronic  SBF2 0.28 0.2813   NA 0.008683  0.0157 2.091
#2 chr11 100001537   T   C intronic CNTN5   NA     NA   NA       NA -0.1877 1.202
#3 chr11 100002012   A   G intronic CNTN5 1.00 0.7227   NA 0.764062 -0.4256 1.584
#4 chr11  10000210   G   C intronic  SBF2 0.28 0.2813   NA 0.222606 -0.3470 0.179

str(df[cols.num])
#'data.frame':  4 obs. of  4 variables:
# $ col1: num  0.28 NA 1 0.28
# $ col2: num  0.281 NA 0.723 0.281
# $ col3: num  NA NA NA NA
# $ col4: num  0.00868 NA 0.76406 0.22261
df[df$col1 > 0.5 & !is.na(df$col1), ]
#    chr       pos ref alt   region  gene col1   col2 col3     col4    col5  col6
#3 chr11 100002012   A   G intronic CNTN5    1 0.7227   NA 0.764062 -0.4256 1.584
df <- structure(list(chr = structure(c(1L, 1L, 1L, 1L), .Label = "chr11",
class = "factor"), pos = c(10000117L, 100001537L, 100002012L, 10000210L),
ref = structure(c(2L, 3L, 1L, 2L), .Label = c("A", "G", "T"), class = "factor"), 
alt = structure(c(1L, 2L, 3L, 2L), .Label = c("A", "C", "G"
), class = "factor"), region = structure(c(1L, 1L, 1L, 1L
), .Label = "intronic", class = "factor"), gene = structure(c(2L, 
1L, 1L, 2L), .Label = c("CNTN5", "SBF2"), class = "factor"), 
col1 = structure(c(2L, 1L, 3L, 2L), .Label = c(".", "0.28", 
"1.0"), class = "factor"), col2 = structure(c(2L, 1L, 3L, 
2L), .Label = c(".", "0.2813", "0.7227"), class = "factor"), 
col3 = structure(c(1L, 1L, 1L, 1L), .Label = ".", class = "factor"), 
col4 = structure(c(2L, 1L, 4L, 3L), .Label = c(".", "0.008683", 
"0.222606", "0.764062"), class = "factor"), col5 = c(0.0157, 
-0.1877, -0.4256, -0.347), col6 = c(2.091, 1.202, 1.584, 
0.179)), class = "data.frame", row.names = c(NA, -4L))