R 如何将值转换为用冒号分隔的含义(双点)

R 如何将值转换为用冒号分隔的含义(双点),r,R,我有这样的数据 df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(

我有这样的数据

df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 
8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 
1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", 
"1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2", 
"2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA, 
-26L))
因此,预期输出如下所示

2:02:02 Homo    Homo    Homo
2:-1:-1 Homo    No  No
-1:2:-1 No  Homo    No
2:-1:-1 Homo    No  No
1(1)|1(2):2:2   Het1 Het2   Homo    Homo
1(1)|1(2):2:2   Het1 Het2   Homo    Homo
2:02:02 Homo    Homo    Homo
2:-1:-1 Homo    No  No
-1:2:2  No  Homo    Homo
2:02:02 Homo    Homo    Homo
2:02:02 Homo    Homo    Homo
2:-1:2  Homo    No  Homo
2:-1:2  Homo    No  Homo
-1:-1:2 No  No  Homo
-1:-1:2 No  No  Homo
-1:2:2  No  Homo    Homo
-1:-1:2 No  No  Homo
1:1(2):1    Het Het2    Het
1:1(2):1    Het Het3    Het
1:01:01 Het Het Het
2:02:02 Homo    Homo    Homo
2:-1:-1 Homo    No  No
-1:-1:2 No  No  Homo
-1:-1:2 No  No  Homo
-1:2:-1 No  Homo    No
1(1)|1(2):1(1)|1(2):1(1)|1(2)   Het1 Het2   Het1 Het2   Het1 Het2 

不确定结果是否正是您所需要的,但这可能会有所帮助。 我认为这也许不是最有效、最漂亮的解决方案,但它可以作为一个起点

但是,我调用了
dats
您的数据:

head(dats)
                              df
1                        2:02:02
2                        2:-1:-1
3                        -1:2:-1
4                        2:-1:-1
5                  1(1)|1(2):2:2
6                  1(1)|1(2):2:2
我创建了一个映射
data.frame

mapping
    id value
1    2  Homo
2   -1    No
3    1   Het
4 1(1)  Het1
5 1(2)  Het2
首先,我使用
stringr::str_split_fixed()
两点进行拆分:

library(stringr)
double_point <- as.data.frame.matrix(str_split_fixed(dats$df, ":", 3))
现在,我们必须用映射替换这些值,并用拆分的原始数据绑定它们(在本例中):


您可以在
num2words
数据框中显式定义所有可能的值,然后运行以下操作

df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 
                                     8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 
                                     1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", 
                                                                 "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2", 
                                                                 "2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                          -26L))
num2words <- read.table(text = "
                        num word
                        2 Homo
                        02  Homo
                        -1  No
                        1 Het
                        01  Het                        
                        1(1)  Het1
                        1(2)  Het2
                        1(1)|1(2) Het1-Het2
                        1(2)|1(1) Het2-Het1
                        ", header = T, stringsAsFactors = F)

lst=lapply(1:nrow(df), function(x) {
  split.nums <- unlist(strsplit(as.character(df[x,]), ":"))
  num2words$word[match(split.nums, num2words$num)]
})

new.df=cbind(df, do.call(rbind, lst))

> new.df

                              df         1         2         3
1                        2:02:02      Homo      Homo      Homo
2                        2:-1:-1      Homo        No        No
3                        -1:2:-1        No      Homo        No
4                        2:-1:-1      Homo        No        No
5                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
6                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
7                        2:02:02      Homo      Homo      Homo
8                        2:-1:-1      Homo        No        No
9                         -1:2:2        No      Homo      Homo
10                       2:02:02      Homo      Homo      Homo
11                       2:02:02      Homo      Homo      Homo
12                        2:-1:2      Homo        No      Homo
13                        2:-1:2      Homo        No      Homo
14                       -1:-1:2        No        No      Homo
15                       -1:-1:2        No        No      Homo
16                        -1:2:2        No      Homo      Homo
17                       -1:-1:2        No        No      Homo
18                      1:1(2):1       Het      Het2       Het
19                      1:1(2):1       Het      Het2       Het
20                       1:01:01       Het       Het       Het
21                       2:02:02      Homo      Homo      Homo
22                       2:-1:-1      Homo        No        No
23                       -1:-1:2        No        No      Homo
24                       -1:-1:2        No        No      Homo
25                       -1:2:-1        No      Homo        No
26 1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1-Het2 Het1-Het2 Het1-Het2

dfBy“double points”是指冒号吗?这是区域术语吗?从来没在电视上听到过US@camille在葡萄牙,它是“dois pontos”,意思是“两点”。@Camille我的意思是:Will
02
2
匹配到同一个字符串?@akrun Yes 02和2是同一个字符串,您的代码不打印为1(1)或1(2)。你能告诉我映射的str吗?嗨,发布了编辑。似乎在最后一个输出的最后一行中,它针对您提到的案例进行打印。
listed <- list() # empty list 
for (i in (1:ncol(double_point))){
  listed[[i]] <- (double_point[,i])
  listed[[i]] <- str_split_fixed(listed[[i]], "\\|", 2)
}

# put as data frame
df_ <- do.call(cbind, listed)

# this is going to help in the future
df_1 <- df_

# result till now:
head(df_1)
     [,1]   [,2]   [,3] [,4] [,5] [,6]
[1,] "2"    ""     "02" ""   "02" ""  
[2,] "2"    ""     "-1" ""   "-1" ""  
[3,] "-1"   ""     "2"  ""   "-1" ""  
[4,] "2"    ""     "-1" ""   "-1" ""  
[5,] "1(1)" "1(2)" "2"  ""   "2"  ""  
[6,] "1(1)" "1(2)" "2"  ""   "2"  ""
listed <- list()

for (i in (1:ncol(df_))){
  df_[,i] <-  gsub("0","",df_[,i])
  listed[[i]] <- mapping[match(df_[,i], mapping$id), 2, drop=F]
}

df_final <- cbind(df_1,do.call(cbind, listed))
head(df_final)
       1    2  3 4  5 6 value value value value value value
1      2      02   02    Homo  <NA>  Homo  <NA>  Homo  <NA>
1.1    2      -1   -1    Homo  <NA>    No  <NA>    No  <NA>
2     -1       2   -1      No  <NA>  Homo  <NA>    No  <NA>
1.2    2      -1   -1    Homo  <NA>    No  <NA>    No  <NA>
4   1(1) 1(2)  2    2    Het1  Het2  Homo  <NA>  Homo  <NA>
4.1 1(1) 1(2)  2    2    Het1  Het2  Homo  <NA>  Homo  <NA>
dput(mapping)
structure(list(id = structure(c(5L, 1L, 2L, 3L, 4L), .Label = c("-1", 
"1", "1(1)", "1(2)", "2"), class = "factor"), value = structure(c(4L, 
5L, 1L, 2L, 3L), .Label = c("Het", "Het1", "Het2", "Homo", "No"
), class = "factor")), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5"))

str(mapping)
'data.frame':   5 obs. of  2 variables:
 $ id   : Factor w/ 5 levels "-1","1","1(1)",..: 5 1 2 3 4
 $ value: Factor w/ 5 levels "Het","Het1","Het2",..: 4 5 1 2 3
df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 
                                     8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 
                                     1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", 
                                                                 "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2", 
                                                                 "2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                          -26L))
num2words <- read.table(text = "
                        num word
                        2 Homo
                        02  Homo
                        -1  No
                        1 Het
                        01  Het                        
                        1(1)  Het1
                        1(2)  Het2
                        1(1)|1(2) Het1-Het2
                        1(2)|1(1) Het2-Het1
                        ", header = T, stringsAsFactors = F)

lst=lapply(1:nrow(df), function(x) {
  split.nums <- unlist(strsplit(as.character(df[x,]), ":"))
  num2words$word[match(split.nums, num2words$num)]
})

new.df=cbind(df, do.call(rbind, lst))

> new.df

                              df         1         2         3
1                        2:02:02      Homo      Homo      Homo
2                        2:-1:-1      Homo        No        No
3                        -1:2:-1        No      Homo        No
4                        2:-1:-1      Homo        No        No
5                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
6                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
7                        2:02:02      Homo      Homo      Homo
8                        2:-1:-1      Homo        No        No
9                         -1:2:2        No      Homo      Homo
10                       2:02:02      Homo      Homo      Homo
11                       2:02:02      Homo      Homo      Homo
12                        2:-1:2      Homo        No      Homo
13                        2:-1:2      Homo        No      Homo
14                       -1:-1:2        No        No      Homo
15                       -1:-1:2        No        No      Homo
16                        -1:2:2        No      Homo      Homo
17                       -1:-1:2        No        No      Homo
18                      1:1(2):1       Het      Het2       Het
19                      1:1(2):1       Het      Het2       Het
20                       1:01:01       Het       Het       Het
21                       2:02:02      Homo      Homo      Homo
22                       2:-1:-1      Homo        No        No
23                       -1:-1:2        No        No      Homo
24                       -1:-1:2        No        No      Homo
25                       -1:2:-1        No      Homo        No
26 1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1-Het2 Het1-Het2 Het1-Het2