通过聚合R中的复杂成对名称求和

通过聚合R中的复杂成对名称求和,r,dplyr,plyr,R,Dplyr,Plyr,在R中,我试图基于唯一ID聚合数据帧,但我需要为ID使用某种通配符值。意思是我有这样成对的名字: lion_tiger elephant_lion tiger_lion 我需要将lion_tiger和tiger_lion ID加在一起,因为成对的顺序并不重要 以此数据帧为例: df <- data.frame(pair = c("1_3","2_4","2_2","1_2","2_1","4_2","3_1","4_3","3_2"), value = c("

在R中,我试图基于唯一ID聚合数据帧,但我需要为ID使用某种通配符值。意思是我有这样成对的名字:

lion_tiger
elephant_lion
tiger_lion
我需要将lion_tiger和tiger_lion ID加在一起,因为成对的顺序并不重要

以此数据帧为例:

df <- data.frame(pair = c("1_3","2_4","2_2","1_2","2_1","4_2","3_1","4_3","3_2"), 
             value = c("12","10","19","2","34","29","13","3","14"))

有什么建议吗?虽然我的例子中有数字作为配对ID,但实际上我需要它来读取文本(如上面的“狮子虎”例子)。

我们可以将“配对”列按
拆分,然后
排序
粘贴
返回,按函数分组使用它来获得

tapply(as.numeric(as.character(df$value)), 
    sapply(strsplit(as.character(df$pair), '_'), function(x) 
     paste(sort(as.numeric(x)), collapse="_")), FUN = sum)

或者另一个选项是
gsubfn

library(gsubfn)
df$pair <- gsubfn('([0-9]+)_([0-9]+)', ~paste(sort(as.numeric(c(x, y))), collapse='_'),  
      as.character(df$pair))
df$value <- as.numeric(as.character(df$value))
aggregate(value~pair, df, sum)
库(gsubfn)

df$pair使用tidyverse和Purrlyr

df <- data.frame(name=c("lion_tiger","elephant_lion",
            "tiger_lion"),value=c(1,2,3),stringsAsFactors=FALSE) 

require(tidyverse)
require(purrrlyr)
df %>% separate(col = name, sep = "_", c("A", "B")) %>% 
        by_row(.collate = "rows", 
            ..f = function(this_row) {
                  paste0(sort(c(this_row$A, this_row$B)), collapse = "_")
            }) %>% 
        rename(sorted = ".out") %>%
        group_by(sorted) %>%
        summarize(sum(value))%>%show
## A tibble: 2 x 2
#  sorted `sum(value)`
#  <chr>        <dbl>
#1 elephant_lion   2
#2 lion_tiger      4   
df%单独(col=name,sep=“25;”,c(“A”、“B”))%>%
按行(.collate=“行”,
..f=功能(本行){
粘贴0(排序(c(此_行$A,此_行$B)),折叠=“_”)
}) %>% 
重命名(排序为“.out”)%%>%
分组依据(已排序)%>%
汇总(总和(值))%>%show
##一个tibble:2x2
#排序和(值)`
#          
#1大象狮子2
#2狮子虎4

gsubfn与全文一起使用怎么样?就像“狮子虎”的例子一样?@shu251然后你必须将模式从
([0-9]+)
更改为
([a-z]+)
我将show替换为'as.data.frame',并将其写入新的dataframe。谢谢!
df <- data.frame(name=c("lion_tiger","elephant_lion",
            "tiger_lion"),value=c(1,2,3),stringsAsFactors=FALSE) 

require(tidyverse)
require(purrrlyr)
df %>% separate(col = name, sep = "_", c("A", "B")) %>% 
        by_row(.collate = "rows", 
            ..f = function(this_row) {
                  paste0(sort(c(this_row$A, this_row$B)), collapse = "_")
            }) %>% 
        rename(sorted = ".out") %>%
        group_by(sorted) %>%
        summarize(sum(value))%>%show
## A tibble: 2 x 2
#  sorted `sum(value)`
#  <chr>        <dbl>
#1 elephant_lion   2
#2 lion_tiger      4