从数据帧列'C'分配/连接值,对应于同一对值'A`&`B`到第二个数据帧。R-dplyr
我有两个数据帧从数据帧列'C'分配/连接值,对应于同一对值'A`&`B`到第二个数据帧。R-dplyr,r,dataframe,dplyr,R,Dataframe,Dplyr,我有两个数据帧 第一个(df1)是由字符串names1和names2及其频率组成的频率数据帧 第二个(df2)包含两列names1和names2,其中包含一个或多个对,或者不包含这些对。有时会有不同的顺序 我想在第一个dafaramedf1的新列中指定频率 df1 <- tibble(names1 = c('architecture', 'assessment', 'build'), names2 = c('build', 'data', 'data'),
- 第一个(
)是由字符串df1
和names1
及其names2
频率组成的频率数据帧李>
- 第二个(
)包含两列df2
和names1
,其中包含一个或多个对,或者不包含这些对。有时会有不同的顺序names2
- 第二个(
df1
的新列中指定频率
df1 <- tibble(names1 = c('architecture', 'assessment', 'build'),
names2 = c('build', 'data', 'data'),
frequency = c(36,13,720))
# A tibble: 3 x 3
names1 names2 frequency
<chr> <chr> <dbl>
1 architecture build 36
2 assessment data 13
3 build data 720
对于此结果:
names1 names2 frequency
<chr> <chr> <dbl>
1 architecture build 36
2 build architecture 36
3 assessment data 13
4 assessment data 13
5 business strategy 0
注意:我想保留不匹配的行
5 business strategy 0
这里的问题是,名称列的顺序对于联接很重要,所以必须更新数据集并应用一致的顺序 下面是一个
dplyr
解决方案:
library(dplyr)
df1 <- tibble(names1 = c('architecture', 'assessment', 'build'),
names2 = c('build', 'data', 'data'),
frequency = c(36,13,720))
df2 <- tibble(names1 = c('architecture', 'build', 'assessment','assessment', 'business'),
names2 = c('build','architecture', 'data', 'data', 'strategy'))
# update df1
df1 = df1 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_")) %>%
select(names, frequency)
# update df2
df2 = df2 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_"))
# join datasets and update columns
left_join(df2, df1, by="names") %>%
mutate(frequency = coalesce(frequency, 0)) %>%
select(-names) %>%
ungroup()
# names1 names2 frequency
# <chr> <chr> <dbl>
# 1 architecture build 36
# 2 build architecture 36
# 3 assessment data 13
# 4 assessment data 13
# 5 business strategy 0
require(dplyr
require(tidyr)
left_join(df2,df1,by=c("names1","names2")) %>%
left_join(df1,by=c(names1="names2",names2="names1")) %>%
mutate(frequency=coalesce(frequency.x,frequency.y,0)) %>%
select(-frequency.x,-frequency.y)
库(dplyr)
df1%
mutate(names=paste0(sort(c(names1,names2)),collapse=“\u”))%>%
选择(名称、频率)
#更新df2
df2=df2%>%
行()
mutate(names=paste0(sort(c(names1,names2)),collapse=“\u1”))
#连接数据集并更新列
左联合(df2,df1,by=“name”)%>%
突变(频率=结合(频率,0))%>%
选择(-names)%%>%
解组()
#名称1名称2频率
#
#1建筑建造36
#2构建架构36
#3评估数据13
#4评估数据13
#5商业策略0
这里的问题是,为了加入,名称列的顺序很重要,所以您必须更新数据集并应用一致的顺序
下面是一个dplyr
解决方案:
library(dplyr)
df1 <- tibble(names1 = c('architecture', 'assessment', 'build'),
names2 = c('build', 'data', 'data'),
frequency = c(36,13,720))
df2 <- tibble(names1 = c('architecture', 'build', 'assessment','assessment', 'business'),
names2 = c('build','architecture', 'data', 'data', 'strategy'))
# update df1
df1 = df1 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_")) %>%
select(names, frequency)
# update df2
df2 = df2 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_"))
# join datasets and update columns
left_join(df2, df1, by="names") %>%
mutate(frequency = coalesce(frequency, 0)) %>%
select(-names) %>%
ungroup()
# names1 names2 frequency
# <chr> <chr> <dbl>
# 1 architecture build 36
# 2 build architecture 36
# 3 assessment data 13
# 4 assessment data 13
# 5 business strategy 0
require(dplyr
require(tidyr)
left_join(df2,df1,by=c("names1","names2")) %>%
left_join(df1,by=c(names1="names2",names2="names1")) %>%
mutate(frequency=coalesce(frequency.x,frequency.y,0)) %>%
select(-frequency.x,-frequency.y)
库(dplyr)
df1%
mutate(names=paste0(sort(c(names1,names2)),collapse=“\u”))%>%
选择(名称、频率)
#更新df2
df2=df2%>%
行()
mutate(names=paste0(sort(c(names1,names2)),collapse=“\u1”))
#连接数据集并更新列
左联合(df2,df1,by=“name”)%>%
突变(频率=结合(频率,0))%>%
选择(-names)%%>%
解组()
#名称1名称2频率
#
#1建筑建造36
#2构建架构36
#3评估数据13
#4评估数据13
#5商业策略0
一个递归的tidyr::left\u join
与一些dplyr
解决方案:
library(dplyr)
df1 <- tibble(names1 = c('architecture', 'assessment', 'build'),
names2 = c('build', 'data', 'data'),
frequency = c(36,13,720))
df2 <- tibble(names1 = c('architecture', 'build', 'assessment','assessment', 'business'),
names2 = c('build','architecture', 'data', 'data', 'strategy'))
# update df1
df1 = df1 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_")) %>%
select(names, frequency)
# update df2
df2 = df2 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_"))
# join datasets and update columns
left_join(df2, df1, by="names") %>%
mutate(frequency = coalesce(frequency, 0)) %>%
select(-names) %>%
ungroup()
# names1 names2 frequency
# <chr> <chr> <dbl>
# 1 architecture build 36
# 2 build architecture 36
# 3 assessment data 13
# 4 assessment data 13
# 5 business strategy 0
require(dplyr
require(tidyr)
left_join(df2,df1,by=c("names1","names2")) %>%
left_join(df1,by=c(names1="names2",names2="names1")) %>%
mutate(frequency=coalesce(frequency.x,frequency.y,0)) %>%
select(-frequency.x,-frequency.y)
此解决方案保留df2中列的顺序。之所以有mutate和select行,是因为left_join添加了新的列,这些列需要合并回单个频率列(并用0替换NAs),然后删除
结果:
# A tibble: 5 x 3
names1 names2 frequency
<chr> <chr> <dbl>
1 architecture build 36
2 build architecture 36
3 assessment data 13
4 assessment data 13
5 business strategy 0
#一个tible:5 x 3
名称1名称2频率
1建筑建造36
2构建架构36
3评估数据13
4评估数据13
5商业策略0
一个递归的tidyr::left\u join
与一些dplyr
解决方案:
library(dplyr)
df1 <- tibble(names1 = c('architecture', 'assessment', 'build'),
names2 = c('build', 'data', 'data'),
frequency = c(36,13,720))
df2 <- tibble(names1 = c('architecture', 'build', 'assessment','assessment', 'business'),
names2 = c('build','architecture', 'data', 'data', 'strategy'))
# update df1
df1 = df1 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_")) %>%
select(names, frequency)
# update df2
df2 = df2 %>%
rowwise() %>%
mutate(names = paste0(sort(c(names1, names2)), collapse = "_"))
# join datasets and update columns
left_join(df2, df1, by="names") %>%
mutate(frequency = coalesce(frequency, 0)) %>%
select(-names) %>%
ungroup()
# names1 names2 frequency
# <chr> <chr> <dbl>
# 1 architecture build 36
# 2 build architecture 36
# 3 assessment data 13
# 4 assessment data 13
# 5 business strategy 0
require(dplyr
require(tidyr)
left_join(df2,df1,by=c("names1","names2")) %>%
left_join(df1,by=c(names1="names2",names2="names1")) %>%
mutate(frequency=coalesce(frequency.x,frequency.y,0)) %>%
select(-frequency.x,-frequency.y)
此解决方案保留df2中列的顺序。之所以有mutate和select行,是因为left_join添加了新的列,这些列需要合并回单个频率列(并用0替换NAs),然后删除
结果:
# A tibble: 5 x 3
names1 names2 frequency
<chr> <chr> <dbl>
1 architecture build 36
2 build architecture 36
3 assessment data 13
4 assessment data 13
5 business strategy 0
#一个tible:5 x 3
名称1名称2频率
1建筑建造36
2构建架构36
3评估数据13
4评估数据13
5商业策略0
从dplyr
中签出left\u join
和right\u join
我的问题是有时我有df1$A==df2$B和&df1$B==df2$A
或df1$A==df2$B和&df1$B==df2$A
``1架构构建36``签出left\u join
和right\u join
fromdplyr
我的问题是有时我有df1$A==df2$B和&df1$B==df2$A
或df1$A==df2$B和&df1$B==df2$A
``1体系结构构建36 2构建体系结构36```