R 如何将值转换为用冒号分隔的含义(双点)
我有这样的数据R 如何将值转换为用冒号分隔的含义(双点),r,R,我有这样的数据 df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(
df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L,
8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L,
1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01",
"1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2",
"2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA,
-26L))
因此,预期输出如下所示
2:02:02 Homo Homo Homo
2:-1:-1 Homo No No
-1:2:-1 No Homo No
2:-1:-1 Homo No No
1(1)|1(2):2:2 Het1 Het2 Homo Homo
1(1)|1(2):2:2 Het1 Het2 Homo Homo
2:02:02 Homo Homo Homo
2:-1:-1 Homo No No
-1:2:2 No Homo Homo
2:02:02 Homo Homo Homo
2:02:02 Homo Homo Homo
2:-1:2 Homo No Homo
2:-1:2 Homo No Homo
-1:-1:2 No No Homo
-1:-1:2 No No Homo
-1:2:2 No Homo Homo
-1:-1:2 No No Homo
1:1(2):1 Het Het2 Het
1:1(2):1 Het Het3 Het
1:01:01 Het Het Het
2:02:02 Homo Homo Homo
2:-1:-1 Homo No No
-1:-1:2 No No Homo
-1:-1:2 No No Homo
-1:2:-1 No Homo No
1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1 Het2 Het1 Het2 Het1 Het2
不确定结果是否正是您所需要的,但这可能会有所帮助。 我认为这也许不是最有效、最漂亮的解决方案,但它可以作为一个起点 但是,我调用了
dats
您的数据:
head(dats)
df
1 2:02:02
2 2:-1:-1
3 -1:2:-1
4 2:-1:-1
5 1(1)|1(2):2:2
6 1(1)|1(2):2:2
我创建了一个映射data.frame
:
mapping
id value
1 2 Homo
2 -1 No
3 1 Het
4 1(1) Het1
5 1(2) Het2
首先,我使用stringr::str_split_fixed()
两点进行拆分:
library(stringr)
double_point <- as.data.frame.matrix(str_split_fixed(dats$df, ":", 3))
现在,我们必须用映射替换这些值,并用拆分的原始数据绑定它们(在本例中):
您可以在
num2words
数据框中显式定义所有可能的值,然后运行以下操作
df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L,
8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L,
1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01",
"1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2",
"2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA,
-26L))
num2words <- read.table(text = "
num word
2 Homo
02 Homo
-1 No
1 Het
01 Het
1(1) Het1
1(2) Het2
1(1)|1(2) Het1-Het2
1(2)|1(1) Het2-Het1
", header = T, stringsAsFactors = F)
lst=lapply(1:nrow(df), function(x) {
split.nums <- unlist(strsplit(as.character(df[x,]), ":"))
num2words$word[match(split.nums, num2words$num)]
})
new.df=cbind(df, do.call(rbind, lst))
> new.df
df 1 2 3
1 2:02:02 Homo Homo Homo
2 2:-1:-1 Homo No No
3 -1:2:-1 No Homo No
4 2:-1:-1 Homo No No
5 1(1)|1(2):2:2 Het1-Het2 Homo Homo
6 1(1)|1(2):2:2 Het1-Het2 Homo Homo
7 2:02:02 Homo Homo Homo
8 2:-1:-1 Homo No No
9 -1:2:2 No Homo Homo
10 2:02:02 Homo Homo Homo
11 2:02:02 Homo Homo Homo
12 2:-1:2 Homo No Homo
13 2:-1:2 Homo No Homo
14 -1:-1:2 No No Homo
15 -1:-1:2 No No Homo
16 -1:2:2 No Homo Homo
17 -1:-1:2 No No Homo
18 1:1(2):1 Het Het2 Het
19 1:1(2):1 Het Het2 Het
20 1:01:01 Het Het Het
21 2:02:02 Homo Homo Homo
22 2:-1:-1 Homo No No
23 -1:-1:2 No No Homo
24 -1:-1:2 No No Homo
25 -1:2:-1 No Homo No
26 1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1-Het2 Het1-Het2 Het1-Het2
dfBy“double points”是指冒号吗?这是区域术语吗?从来没在电视上听到过US@camille在葡萄牙,它是“dois pontos”,意思是“两点”。@Camille我的意思是:Will
02
和2
匹配到同一个字符串?@akrun Yes 02和2是同一个字符串,您的代码不打印为1(1)或1(2)。你能告诉我映射的str吗?嗨,发布了编辑。似乎在最后一个输出的最后一行中,它针对您提到的案例进行打印。
listed <- list() # empty list
for (i in (1:ncol(double_point))){
listed[[i]] <- (double_point[,i])
listed[[i]] <- str_split_fixed(listed[[i]], "\\|", 2)
}
# put as data frame
df_ <- do.call(cbind, listed)
# this is going to help in the future
df_1 <- df_
# result till now:
head(df_1)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "2" "" "02" "" "02" ""
[2,] "2" "" "-1" "" "-1" ""
[3,] "-1" "" "2" "" "-1" ""
[4,] "2" "" "-1" "" "-1" ""
[5,] "1(1)" "1(2)" "2" "" "2" ""
[6,] "1(1)" "1(2)" "2" "" "2" ""
listed <- list()
for (i in (1:ncol(df_))){
df_[,i] <- gsub("0","",df_[,i])
listed[[i]] <- mapping[match(df_[,i], mapping$id), 2, drop=F]
}
df_final <- cbind(df_1,do.call(cbind, listed))
head(df_final)
1 2 3 4 5 6 value value value value value value
1 2 02 02 Homo <NA> Homo <NA> Homo <NA>
1.1 2 -1 -1 Homo <NA> No <NA> No <NA>
2 -1 2 -1 No <NA> Homo <NA> No <NA>
1.2 2 -1 -1 Homo <NA> No <NA> No <NA>
4 1(1) 1(2) 2 2 Het1 Het2 Homo <NA> Homo <NA>
4.1 1(1) 1(2) 2 2 Het1 Het2 Homo <NA> Homo <NA>
dput(mapping)
structure(list(id = structure(c(5L, 1L, 2L, 3L, 4L), .Label = c("-1",
"1", "1(1)", "1(2)", "2"), class = "factor"), value = structure(c(4L,
5L, 1L, 2L, 3L), .Label = c("Het", "Het1", "Het2", "Homo", "No"
), class = "factor")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5"))
str(mapping)
'data.frame': 5 obs. of 2 variables:
$ id : Factor w/ 5 levels "-1","1","1(1)",..: 5 1 2 3 4
$ value: Factor w/ 5 levels "Het","Het1","Het2",..: 4 5 1 2 3
df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L,
8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L,
1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01",
"1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2",
"2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA,
-26L))
num2words <- read.table(text = "
num word
2 Homo
02 Homo
-1 No
1 Het
01 Het
1(1) Het1
1(2) Het2
1(1)|1(2) Het1-Het2
1(2)|1(1) Het2-Het1
", header = T, stringsAsFactors = F)
lst=lapply(1:nrow(df), function(x) {
split.nums <- unlist(strsplit(as.character(df[x,]), ":"))
num2words$word[match(split.nums, num2words$num)]
})
new.df=cbind(df, do.call(rbind, lst))
> new.df
df 1 2 3
1 2:02:02 Homo Homo Homo
2 2:-1:-1 Homo No No
3 -1:2:-1 No Homo No
4 2:-1:-1 Homo No No
5 1(1)|1(2):2:2 Het1-Het2 Homo Homo
6 1(1)|1(2):2:2 Het1-Het2 Homo Homo
7 2:02:02 Homo Homo Homo
8 2:-1:-1 Homo No No
9 -1:2:2 No Homo Homo
10 2:02:02 Homo Homo Homo
11 2:02:02 Homo Homo Homo
12 2:-1:2 Homo No Homo
13 2:-1:2 Homo No Homo
14 -1:-1:2 No No Homo
15 -1:-1:2 No No Homo
16 -1:2:2 No Homo Homo
17 -1:-1:2 No No Homo
18 1:1(2):1 Het Het2 Het
19 1:1(2):1 Het Het2 Het
20 1:01:01 Het Het Het
21 2:02:02 Homo Homo Homo
22 2:-1:-1 Homo No No
23 -1:-1:2 No No Homo
24 -1:-1:2 No No Homo
25 -1:2:-1 No Homo No
26 1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1-Het2 Het1-Het2 Het1-Het2