基于delim的分隔行基于r中的两列

基于delim的分隔行基于r中的两列,r,separator,R,Separator,我有以下建议: df_1=data.frame(col_1=c("a;b;c","c;d","e","f","g","h;j"),col_2=c("1;2;3","4","5;6","7","8;9","10;11;12")) 所以我想把col_1分离成单独

我有以下建议:

df_1=data.frame(col_1=c("a;b;c","c;d","e","f","g","h;j"),col_2=c("1;2;3","4","5;6","7","8;9","10;11;12"))
所以我想把col_1分离成单独的行,如果col_2存在的话,对应col_2的值

例如,如果col_1中的元素数量=col_2中的元素数量,则应使用col_1和col_2第1行中的相应值将它们分隔开

如果它们有不同数量的元素,如果一列只有一个元素,那么可以将其分离为不同的行以及第2行

如果它们的元素数量不成比例地超过1个且不相等,则应保持原样

以下是最终的_数据集:

df_2=data.frame(col_1=c("a","b","c","c","d","e","e","f","g","g","h;j"),col_2=c("1","2","3","4","4","5","6","7","8","9","10;11;12"))
我们可以使用cSplit

我们可以使用cSplit


这里是通过定义自定义函数f的另一个基本R选项


这里是通过定义自定义函数f的另一个基本R选项


非常感谢。除了最后一排,我想保持-is@Mel这令人困惑。Hj和10;11:12我还以为是h 1和j 2,但我不知道这一次的拆分,所以最好还是保留下来,谢谢!除了最后一排,我想保持-is@Mel这令人困惑。Hj和10;11:12我以为是h 1和j 2,但我不知道这一次的拆分,所以最好保持原样
library(splitstackshape)
library(zoo)

cnt1 <- nchar(gsub(";", "", df_1$col_1))
cnt2 <- nchar(gsub(";", "", df_1$col_2))
i1 <- cnt1 != cnt2 & cnt1 > 1 & cnt2 > 1
rbind(cSplit(df_1[!i1,], c('col_1', 'col_2'), sep=";", "long")[
          !is.na(col_1)|!is.na(col_2), lapply(.SD, na.locf0)], df_1[i1,])
#     col_1    col_2
# 1:     a        1
# 2:     b        2
# 3:     c        3
# 4:     c        4
# 5:     d        4
# 6:     e        5
# 7:     e        6
# 8:     f        7
# 9:     g        8
#10:     g        9
#11:   h;j 10;11;12
cnt1 <- nchar(gsub(";", "", df_1$col_1))
cnt2 <- nchar(gsub(";", "", df_1$col_2))
i1 <- cnt1 != cnt2 & cnt1 > 1 & cnt2 > 1
   
lst1 <- lapply(df_1[!i1, ], function(x) strsplit(x, ";"))
out <- rbind(do.call(rbind, Map(function(x, y) {
       l1 <- length(x)
       l2 <- length(y)
       mx <- max(l1, l2)
       x <- if(l1 != l2 &  l1 == 1) rep(x, mx) else x
       y <- if(l1 != l2 & l2 == 1) rep(y, mx) else y
       data.frame(col_1 = x, col_2 = y) } ,
       lst1[[1]], lst1[[2]])), df_1[i1,])
   
row.names(out) <- NULL
out
#   col_1    col_2
#1      a        1
#2      b        2
#3      c        3
#4      c        4
#5      d        4
#6      e        5
#7      e        6
#8      f        7
#9      g        8
#10     g        9
#11   h;j 10;11;12
f <- function(v) {
  X <- unlist(strsplit(v[[1]],";"))
  Y <- unlist(strsplit(v[[2]],";"))
  if (length(X) == length(Y) || min(length(X),length(Y))==1) {
    res <- data.frame(col_1 = X, col_2 = Y)
  } else {
    res <- data.frame(col_1 = v[[1]], col_2 = v[[2]])
  }
  res
}

df_2 <- do.call(rbind,apply(df_1,1,f))
   col_1    col_2
1      a        1
2      b        2
3      c        3
4      c        4
5      d        4
6      e        5
7      e        6
8      f        7
9      g        8
10     g        9
11   h;j 10;11;12