Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/71.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
根据dplyr中的范围总结条件_R_Dplyr_Summarize - Fatal编程技术网

根据dplyr中的范围总结条件

根据dplyr中的范围总结条件,r,dplyr,summarize,R,Dplyr,Summarize,这是我的例子的一个例子。 样本数据: df <- data.frame(ID = c(1, 1, 2, 2, 3, 5), A = c("foo", "bar", "foo", "foo", "bar", "bar"), B = c(1, 5, 7, 23, 54, 202)) df ID A B 1 1 foo 1 2 1 bar 5 3 2 foo 7 4 2 foo 23 5 3 bar 54 6 5 bar 202 我使用pa

这是我的例子的一个例子。 样本数据:

 df <- data.frame(ID = c(1, 1, 2, 2, 3, 5), A = c("foo", "bar", "foo", "foo", "bar", "bar"),
 B =     c(1, 5, 7, 23, 54, 202))

df
  ID   A   B
1  1 foo   1
2  1 bar   5
3  2 foo   7
4  2 foo  23
5  3 bar  54
6  5 bar 202
我使用package
dplyr
尝试了此代码:

df %>%
  group_by(ID) %>%
  summarize(count=n(), count_0_5 = n(B>=0 & B<5))
df%>%
分组依据(ID)%>%
总结(count=n(),count\u 0\u 5=n(B>=0&B=0&B=0&B可能替换
n(B>=0&B=0&B
库(dplyr)
图书馆(tidyr)
df%%>%分组依据(ID)%%>%
变异(B_cut=cut(B,c(0,5,10,15,201000),labels=c('count_0_5','count_5_10','count_10_15','count_15_20','count_20_1000')),count=n()%%
分组依据(ID,B切)%>%变异(n=n())%>%切片(1)%>%选择(-A,-B)%>%
排列(B_切,n)
#第二种选择
左加入(df%>%groupby(ID)%>%summary(n=n()),
df%>%变异(B_cut=cut(B,c(0,5,10,15,201000),labels=c('count_0_5','count_5_10','count_10_15','count_15_20','count_20_1000'))>%
计数(ID,B_切割)%>%排列(B_切割,n),
由class='ID')
#一个tibble:4x5
#组别:ID[4]
ID计数计数\u 0\u 5计数\u 5\u 10计数\u 20\u 1000
1 12不适用
2 NA 11
3 3 1 NA 1
4 5 1 NA 1

@Vojtěch Kania抱歉,前面的答案中有一个错误,请检查这一个。
df %>%
  group_by(ID) %>%
  summarize(count=n(), count_0_5 = n(B>=0 & B<5))
`Error in n(B>=0 & B<5) : 
  unused argument (B>=0 & B<5)`
library(dplyr)
library(tidyr)
df %>% group_by(ID) %>%
   mutate(B_cut = cut(B, c(0,5,10,15,20,1000), labels = c('count_0_5','count_5_10','count_10_15','count_15_20','count_20_1000')), count=n()) %>% 
   group_by(ID,B_cut) %>% mutate(n=n()) %>% slice(1) %>% select(-A,-B) %>% 
   spread(B_cut, n)

#2nd option
left_join(df %>% group_by(ID) %>% summarise(n=n()), 
          df %>% mutate(B_cut = cut(B, c(0,5,10,15,20,1000), labels = c('count_0_5','count_5_10','count_10_15','count_15_20','count_20_1000'))) %>% 
                 count(ID,B_cut) %>% spread(B_cut,n), 
          by='ID')

# A tibble: 4 x 5
# Groups:   ID [4]
     ID count count_0_5 count_5_10 count_20_1000
  <dbl> <int>     <int>      <int>         <int>
1     1     2         2         NA            NA
2     2     2        NA          1             1
3     3     1        NA         NA             1
4     5     1        NA         NA             1