拆分字符串,使用拆分字符串中的元素和R中联接表中的sum变量联接另一个表
我有一个ID列表,我想在拆分字符串,使用拆分字符串中的元素和R中联接表中的sum变量联接另一个表,r,join,split,sum,R,Join,Split,Sum,我有一个ID列表,我想在left\u join()和summary()中使用它来获取另一个值的总和。我可以用一行,但不能用多行 这项工作: library(tidyverse) str_split(c("24,108"), ",") %>% as.data.frame() %>% rename_(id = names(.))%>% left_join(barriers_h, by = "id") %&
left\u join()
和summary()
中使用它来获取另一个值的总和。我可以用一行,但不能用多行
这项工作:
library(tidyverse)
str_split(c("24,108"), ",") %>%
as.data.frame() %>%
rename_(id = names(.))%>%
left_join(barriers_h, by = "id") %>%
summarise(sum(Pass)) %>%
pull(1)
这并不重要
df %>%
rowwise() %>%
mutate(pathway.pass = if_else(is.na(barrier.id), NA_real_,
str_split(barrier.id, ",") %>%
as.data.frame() %>%
rename_(id = names(.)) %>%
left_join(barriers_h, by = "id") %>%
summarise(sum(Pass)) %>%
pull(1)
))
并给出了误差
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
arguments imply differing number of rows: 2, 5
df
is
barrier.id pathway.pass
3 24,108 0.25000
4 24,108,167,148,195 0.03125
id Pass
1 24 0.26894142
2 108 0.02931223
3 148 0.07585818
4 167 0.02931223
5 195 0.02931223
barrier\u h
barrier.id pathway.pass
3 24,108 0.25000
4 24,108,167,148,195 0.03125
id Pass
1 24 0.26894142
2 108 0.02931223
3 148 0.07585818
4 167 0.02931223
5 195 0.02931223
期望的结果是
barrier.id pathway.pass
3 24,108 0.26894142
4 24,108,167,148,195 0.1637949
谢谢 以下是我的尝试:
library(data.table)
library(tidyverse)
df <- fread(text="barrier.id pathway.pass
24,108 0.25000
24,108,167,148,195 0.03125")
barrier <- fread(text="id Pass
24 0.26894142
108 0.02931223
148 0.07585818
167 0.02931223
195 0.02931223")
results <- df %>%
mutate(barrier.id = strsplit(barrier.id,","), group = row_number()) %>%
unnest(barrier.id) %>%
mutate(barrier.id = as.numeric(barrier.id)) %>%
left_join(barrier, by = c("barrier.id"="id")) %>%
group_by(group) %>%
summarize(barrier.id = paste0(barrier.id,collapse=","), pathway.pass = sum(Pass))
group barrier.id pathway.pass
<int> <chr> <dbl>
1 1 24,108 0.298
2 2 24,108,167,148,195 0.433
库(data.table)
图书馆(tidyverse)
df%
变异(barrier.id=as.numeric(barrier.id))%>%
左连接(barrier,by=c(“barrier.id”=“id”))%>%
分组依据(分组)%>%
总结(barrier.id=paste0(barrier.id,collapse=“,”),path.pass=sum(pass))
组屏障.id通路.pass
1 1 24,108 0.298
2 2 24,108,167,148,195 0.433
以下是我的尝试:
library(data.table)
library(tidyverse)
df <- fread(text="barrier.id pathway.pass
24,108 0.25000
24,108,167,148,195 0.03125")
barrier <- fread(text="id Pass
24 0.26894142
108 0.02931223
148 0.07585818
167 0.02931223
195 0.02931223")
results <- df %>%
mutate(barrier.id = strsplit(barrier.id,","), group = row_number()) %>%
unnest(barrier.id) %>%
mutate(barrier.id = as.numeric(barrier.id)) %>%
left_join(barrier, by = c("barrier.id"="id")) %>%
group_by(group) %>%
summarize(barrier.id = paste0(barrier.id,collapse=","), pathway.pass = sum(Pass))
group barrier.id pathway.pass
<int> <chr> <dbl>
1 1 24,108 0.298
2 2 24,108,167,148,195 0.433
库(data.table)
图书馆(tidyverse)
df%
变异(barrier.id=as.numeric(barrier.id))%>%
左连接(barrier,by=c(“barrier.id”=“id”))%>%
分组依据(分组)%>%
总结(barrier.id=paste0(barrier.id,collapse=“,”),path.pass=sum(pass))
组屏障.id通路.pass
1 1 24,108 0.298
2 2 24,108,167,148,195 0.433
这行吗
df %>% mutate(ID = row_number()) %>% separate_rows(barrier.id) %>% type.convert(as.is = T) %>%
left_join(barrier_h, by = c('barrier.id' = 'id')) %>% select(1,3,4) %>% group_by(ID) %>%
summarise(barrier.id = toString(barrier.id), Pass = sum(Pass)) %>% rename(pathway.pass = Pass)
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 2 x 3
ID barrier.id pathway.pass
<int> <chr> <dbl>
1 1 24, 108 0.298
2 2 24, 108, 167, 148, 195 0.433
df%%>%mutate(ID=row\u number())%%>%separate\u行(barrier.ID)%%>%type.convert(as.is=T)%%
左连接(barrier\u h,by=c('barrier.id'='id'))%%>%select(1,3,4)%%>%group\u by(id)%%
总结(barrier.id=toString(barrier.id),Pass=sum(Pass))%>%rename(pathway.Pass=Pass)
`summary()`解组输出(用`.groups`参数重写)
#一个tibble:2x3
ID屏障,ID通道,通行证
1 1 24, 108 0.298
2 2 24, 108, 167, 148, 195 0.433
这行吗
df %>% mutate(ID = row_number()) %>% separate_rows(barrier.id) %>% type.convert(as.is = T) %>%
left_join(barrier_h, by = c('barrier.id' = 'id')) %>% select(1,3,4) %>% group_by(ID) %>%
summarise(barrier.id = toString(barrier.id), Pass = sum(Pass)) %>% rename(pathway.pass = Pass)
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 2 x 3
ID barrier.id pathway.pass
<int> <chr> <dbl>
1 1 24, 108 0.298
2 2 24, 108, 167, 148, 195 0.433
df%%>%mutate(ID=row\u number())%%>%separate\u行(barrier.ID)%%>%type.convert(as.is=T)%%
左连接(barrier\u h,by=c('barrier.id'='id'))%%>%select(1,3,4)%%>%group\u by(id)%%
总结(barrier.id=toString(barrier.id),Pass=sum(Pass))%>%rename(pathway.Pass=Pass)
`summary()`解组输出(用`.groups`参数重写)
#一个tibble:2x3
ID屏障,ID通道,通行证
1 1 24, 108 0.298
2 2 24, 108, 167, 148, 195 0.433