R 如何将值转换为用冒号分隔的含义（双点）_R

R 如何将值转换为用冒号分隔的含义（双点）

R 如何将值转换为用冒号分隔的含义（双点）,r,R,我有这样的数据 df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(

我有这样的数据

df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 
8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 
1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", 
"1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2", 
"2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA, 
-26L))

因此，预期输出如下所示

2:02:02 Homo    Homo    Homo
2:-1:-1 Homo    No  No
-1:2:-1 No  Homo    No
2:-1:-1 Homo    No  No
1(1)|1(2):2:2   Het1 Het2   Homo    Homo
1(1)|1(2):2:2   Het1 Het2   Homo    Homo
2:02:02 Homo    Homo    Homo
2:-1:-1 Homo    No  No
-1:2:2  No  Homo    Homo
2:02:02 Homo    Homo    Homo
2:02:02 Homo    Homo    Homo
2:-1:2  Homo    No  Homo
2:-1:2  Homo    No  Homo
-1:-1:2 No  No  Homo
-1:-1:2 No  No  Homo
-1:2:2  No  Homo    Homo
-1:-1:2 No  No  Homo
1:1(2):1    Het Het2    Het
1:1(2):1    Het Het3    Het
1:01:01 Het Het Het
2:02:02 Homo    Homo    Homo
2:-1:-1 Homo    No  No
-1:-1:2 No  No  Homo
-1:-1:2 No  No  Homo
-1:2:-1 No  Homo    No
1(1)|1(2):1(1)|1(2):1(1)|1(2)   Het1 Het2   Het1 Het2   Het1 Het2

不确定结果是否正是您所需要的，但这可能会有所帮助。我认为这也许不是最有效、最漂亮的解决方案，但它可以作为一个起点

但是，我调用了

dats

您的数据：

head(dats)
                              df
1                        2:02:02
2                        2:-1:-1
3                        -1:2:-1
4                        2:-1:-1
5                  1(1)|1(2):2:2
6                  1(1)|1(2):2:2

我创建了一个映射

data.frame

：

mapping
    id value
1    2  Homo
2   -1    No
3    1   Het
4 1(1)  Het1
5 1(2)  Het2

首先，我使用

stringr:：str_split_fixed（）

两点进行拆分：

library(stringr)
double_point <- as.data.frame.matrix(str_split_fixed(dats$df, ":", 3))

现在，我们必须用映射替换这些值，并用拆分的原始数据绑定它们（在本例中）：

您可以在

num2words

数据框中显式定义所有可能的值，然后运行以下操作

df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 
                                     8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 
                                     1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", 
                                                                 "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2", 
                                                                 "2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                          -26L))
num2words <- read.table(text = "
                        num word
                        2 Homo
                        02  Homo
                        -1  No
                        1 Het
                        01  Het                        
                        1(1)  Het1
                        1(2)  Het2
                        1(1)|1(2) Het1-Het2
                        1(2)|1(1) Het2-Het1
                        ", header = T, stringsAsFactors = F)

lst=lapply(1:nrow(df), function(x) {
  split.nums <- unlist(strsplit(as.character(df[x,]), ":"))
  num2words$word[match(split.nums, num2words$num)]
})

new.df=cbind(df, do.call(rbind, lst))

> new.df

                              df         1         2         3
1                        2:02:02      Homo      Homo      Homo
2                        2:-1:-1      Homo        No        No
3                        -1:2:-1        No      Homo        No
4                        2:-1:-1      Homo        No        No
5                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
6                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
7                        2:02:02      Homo      Homo      Homo
8                        2:-1:-1      Homo        No        No
9                         -1:2:2        No      Homo      Homo
10                       2:02:02      Homo      Homo      Homo
11                       2:02:02      Homo      Homo      Homo
12                        2:-1:2      Homo        No      Homo
13                        2:-1:2      Homo        No      Homo
14                       -1:-1:2        No        No      Homo
15                       -1:-1:2        No        No      Homo
16                        -1:2:2        No      Homo      Homo
17                       -1:-1:2        No        No      Homo
18                      1:1(2):1       Het      Het2       Het
19                      1:1(2):1       Het      Het2       Het
20                       1:01:01       Het       Het       Het
21                       2:02:02      Homo      Homo      Homo
22                       2:-1:-1      Homo        No        No
23                       -1:-1:2        No        No      Homo
24                       -1:-1:2        No        No      Homo
25                       -1:2:-1        No      Homo        No
26 1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1-Het2 Het1-Het2 Het1-Het2

dfBy“double points”是指冒号吗？这是区域术语吗？从来没在电视上听到过US@camille在葡萄牙，它是“dois pontos”，意思是“两点”。@Camille我的意思是：Will

和

匹配到同一个字符串？@akrun Yes 02和2是同一个字符串，您的代码不打印为1（1）或1（2）。你能告诉我映射的str吗？嗨，发布了编辑。似乎在最后一个输出的最后一行中，它针对您提到的案例进行打印。

listed <- list() # empty list 
for (i in (1:ncol(double_point))){
  listed[[i]] <- (double_point[,i])
  listed[[i]] <- str_split_fixed(listed[[i]], "\\|", 2)
}

# put as data frame
df_ <- do.call(cbind, listed)

# this is going to help in the future
df_1 <- df_

# result till now:
head(df_1)
     [,1]   [,2]   [,3] [,4] [,5] [,6]
[1,] "2"    ""     "02" ""   "02" ""  
[2,] "2"    ""     "-1" ""   "-1" ""  
[3,] "-1"   ""     "2"  ""   "-1" ""  
[4,] "2"    ""     "-1" ""   "-1" ""  
[5,] "1(1)" "1(2)" "2"  ""   "2"  ""  
[6,] "1(1)" "1(2)" "2"  ""   "2"  ""

listed <- list()

for (i in (1:ncol(df_))){
  df_[,i] <-  gsub("0","",df_[,i])
  listed[[i]] <- mapping[match(df_[,i], mapping$id), 2, drop=F]
}

df_final <- cbind(df_1,do.call(cbind, listed))
head(df_final)
       1    2  3 4  5 6 value value value value value value
1      2      02   02    Homo  <NA>  Homo  <NA>  Homo  <NA>
1.1    2      -1   -1    Homo  <NA>    No  <NA>    No  <NA>
2     -1       2   -1      No  <NA>  Homo  <NA>    No  <NA>
1.2    2      -1   -1    Homo  <NA>    No  <NA>    No  <NA>
4   1(1) 1(2)  2    2    Het1  Het2  Homo  <NA>  Homo  <NA>
4.1 1(1) 1(2)  2    2    Het1  Het2  Homo  <NA>  Homo  <NA>

dput(mapping)
structure(list(id = structure(c(5L, 1L, 2L, 3L, 4L), .Label = c("-1", 
"1", "1(1)", "1(2)", "2"), class = "factor"), value = structure(c(4L, 
5L, 1L, 2L, 3L), .Label = c("Het", "Het1", "Het2", "Homo", "No"
), class = "factor")), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5"))

str(mapping)
'data.frame':   5 obs. of  2 variables:
 $ id   : Factor w/ 5 levels "-1","1","1(1)",..: 5 1 2 3 4
 $ value: Factor w/ 5 levels "Het","Het1","Het2",..: 4 5 1 2 3

df<- structure(list(df = structure(c(10L, 8L, 2L, 8L, 7L, 7L, 10L, 
                                     8L, 3L, 10L, 10L, 9L, 9L, 1L, 1L, 3L, 1L, 5L, 5L, 4L, 10L, 8L, 
                                     1L, 1L, 2L, 6L), .Label = c("-1:-1:2", "-1:2:-1", "-1:2:2", "1:01:01", 
                                                                 "1:1(2):1", "1(1)|1(2):1(1)|1(2):1(1)|1(2)", "1(1)|1(2):2:2", 
                                                                 "2:-1:-1", "2:-1:2", "2:02:02"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                          -26L))
num2words <- read.table(text = "
                        num word
                        2 Homo
                        02  Homo
                        -1  No
                        1 Het
                        01  Het                        
                        1(1)  Het1
                        1(2)  Het2
                        1(1)|1(2) Het1-Het2
                        1(2)|1(1) Het2-Het1
                        ", header = T, stringsAsFactors = F)

lst=lapply(1:nrow(df), function(x) {
  split.nums <- unlist(strsplit(as.character(df[x,]), ":"))
  num2words$word[match(split.nums, num2words$num)]
})

new.df=cbind(df, do.call(rbind, lst))

> new.df

                              df         1         2         3
1                        2:02:02      Homo      Homo      Homo
2                        2:-1:-1      Homo        No        No
3                        -1:2:-1        No      Homo        No
4                        2:-1:-1      Homo        No        No
5                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
6                  1(1)|1(2):2:2 Het1-Het2      Homo      Homo
7                        2:02:02      Homo      Homo      Homo
8                        2:-1:-1      Homo        No        No
9                         -1:2:2        No      Homo      Homo
10                       2:02:02      Homo      Homo      Homo
11                       2:02:02      Homo      Homo      Homo
12                        2:-1:2      Homo        No      Homo
13                        2:-1:2      Homo        No      Homo
14                       -1:-1:2        No        No      Homo
15                       -1:-1:2        No        No      Homo
16                        -1:2:2        No      Homo      Homo
17                       -1:-1:2        No        No      Homo
18                      1:1(2):1       Het      Het2       Het
19                      1:1(2):1       Het      Het2       Het
20                       1:01:01       Het       Het       Het
21                       2:02:02      Homo      Homo      Homo
22                       2:-1:-1      Homo        No        No
23                       -1:-1:2        No        No      Homo
24                       -1:-1:2        No        No      Homo
25                       -1:2:-1        No      Homo        No
26 1(1)|1(2):1(1)|1(2):1(1)|1(2) Het1-Het2 Het1-Het2 Het1-Het2