如何计算R中一个项目与另一个项目组合在一起的次数？_R_Dplyr

如何计算R中一个项目与另一个项目组合在一起的次数？

如何计算R中一个项目与另一个项目组合在一起的次数？,r,dplyr,R,Dplyr,我有一张桌子（示例）：我正在尝试将其转换为： Country 1 | Country 2 | Count ------------------------------- SE DE 2 SE FI 2 FI DE 1 我试过使用dplyr的计数、分组、总结，但我似乎无法理解它。相反，我得到了一个表格，每个国家作为列，每个组作为行，如果国家是否在组中，单元格中有一个1

我有一张桌子（示例）：

我正在尝试将其转换为：

 Country 1 | Country 2 | Count
-------------------------------
    SE          DE         2
    SE          FI         2
    FI          DE         1

我试过使用dplyr的计数、分组、总结，但我似乎无法理解它。相反，我得到了一个表格，每个国家作为列，每个组作为行，如果国家是否在组中，单元格中有一个1或0。

我们可以使用

base R

方法，使用

table

获取频率，执行

交叉生产，将对角线和下三角形元素设置为NA
，并在转换为data.frame后删除NA
行
m1 <- crossprod(table(df1))
m1[lower.tri(m1, diag = TRUE)] <- NA
subset(as.data.frame.table(m1), !is.na(Freq))
#    Country Country.1 Freq
#4      DE        FI    1
#7      DE        SE    2
#8      FI        SE    2

m1这里有一种使用combn

library(tidyverse)
df %>%
    group_by(Group) %>%
    summarise(cmbn = list(apply(combn(Country, 2), 2, function(x)
        paste(sort(x), collapse = "_")))) %>%
    unnest() %>%
    select(-Group) %>%
    separate(cmbn, into = c("Country 1", "Country 2"), sep = "_") %>%
    count(`Country 1`, `Country 2`)
## A tibble: 3 x 3
#  `Country 1` `Country 2`     n
#  <chr>       <chr>       <int>
#1 DE          FI              1
#2 DE          SE              2
#3 FI          SE              2

库（tidyverse）
df%>%
分组依据（分组）%>%
总结（cmbn=列表（适用）（组合（国家，2），2，职能（x）
粘贴（排序（x），折叠=“\u”）））%%>%
unest（）%>%
选择（-Group）%%>%
分离（cmbn，分为=c（“国家1”、“国家2”），sep=“”）%>%
计数（`Country 1`、`Country 2`）
##一个tibble:3x3
#`Country 1``Country 2`n
#                
#1德菲1
#2德赛2
#3芬兰东南欧2


样本数据
df一种替代的dplyr
方法，将一个函数应用于每个国家
值组合
df = read.table(text = "
Group Country
Group1      SE
Group1      DE  
Group2      SE   
Group2      DE
Group2      FI
Group3      SE
Group3      FI
", header=T, stringsAsFactors=F)

library(dplyr)

# function that takes 2 Country values and returns the number of common groups they have
f = function(x,y) { 
  df %>% 
    filter(Country %in% c(x,y)) %>% 
    distinct() %>%
    count(Group) %>%
    filter(n > 1) %>%
    nrow() 
}

# vectorising the function
f = Vectorize(f)

# applying the function to each Country value combination
data.frame(t(combn(unique(df$Country), 2)), stringsAsFactors = F) %>%
  mutate(NumGroups = f(X1, X2))

#   X1 X2 NumGroups
# 1 SE DE         2
# 2 SE FI         2
# 3 DE FI         1

您不需要应用apply
，因为combn
具有FUN
参数，即df%>%groupby（group）%%>%summary（cmbn=list（combn（Country，2，FUN=function（x）粘贴（sort（x），collapse=“”））相关：；
library(tidyverse)
df %>%
    group_by(Group) %>%
    summarise(cmbn = list(apply(combn(Country, 2), 2, function(x)
        paste(sort(x), collapse = "_")))) %>%
    unnest() %>%
    select(-Group) %>%
    separate(cmbn, into = c("Country 1", "Country 2"), sep = "_") %>%
    count(`Country 1`, `Country 2`)
## A tibble: 3 x 3
#  `Country 1` `Country 2`     n
#  <chr>       <chr>       <int>
#1 DE          FI              1
#2 DE          SE              2
#3 FI          SE              2

df <- read.table(text =
    "Group    Country
 Group1      SE
 Group1      DE
 Group2      SE
 Group2      DE
 Group2      FI
 Group3      SE
 Group3      FI", header = T, stringsAsFactors = F)

df = read.table(text = "
Group Country
Group1      SE
Group1      DE  
Group2      SE   
Group2      DE
Group2      FI
Group3      SE
Group3      FI
", header=T, stringsAsFactors=F)

library(dplyr)

# function that takes 2 Country values and returns the number of common groups they have
f = function(x,y) { 
  df %>% 
    filter(Country %in% c(x,y)) %>% 
    distinct() %>%
    count(Group) %>%
    filter(n > 1) %>%
    nrow() 
}

# vectorising the function
f = Vectorize(f)

# applying the function to each Country value combination
data.frame(t(combn(unique(df$Country), 2)), stringsAsFactors = F) %>%
  mutate(NumGroups = f(X1, X2))

#   X1 X2 NumGroups
# 1 SE DE         2
# 2 SE FI         2
# 3 DE FI         1