Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/69.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 试图扩大数据帧,同时通过两列的因素进行交互_R_Dataframe_Dplyr_Pipe_Tidyr - Fatal编程技术网

R 试图扩大数据帧,同时通过两列的因素进行交互

R 试图扩大数据帧,同时通过两列的因素进行交互,r,dataframe,dplyr,pipe,tidyr,R,Dataframe,Dplyr,Pipe,Tidyr,这是我的可复制的示例数据框,我有一个年份栏和一个癌症栏,这两个都是因素,我试图在相互作用的同时扩大数据框。解释起来有点复杂,所以我展示了原始数据帧和最终数据帧。基本上,我是想把每年的癌症统计数据分开 理想情况下,这将通过管道实现,因为这是我更熟悉的,并且可以调整我的实际数据框架,即10年的水平和15种癌症水平,以及第二列中的10种水平,我希望对其进行相同的治疗 YR<-as.factor( c(2019,2018,2019,2019,2018,2018,2019,2019,2018)) S

这是我的可复制的示例数据框,我有一个年份栏和一个癌症栏,这两个都是因素,我试图在相互作用的同时扩大数据框。解释起来有点复杂,所以我展示了原始数据帧和最终数据帧。基本上,我是想把每年的癌症统计数据分开

理想情况下,这将通过管道实现,因为这是我更熟悉的,并且可以调整我的实际数据框架,即10年的水平和15种癌症水平,以及第二列中的10种水平,我希望对其进行相同的治疗

YR<-as.factor( c(2019,2018,2019,2019,2018,2018,2019,2019,2018))
STATE<-as.factor( c("CA","MA","KY","KY","CA","MA","KY","KY","CA"))
COUNTY<-as.factor( c("C1","M1","K1","K2","C1","M2","K1","K2","C1"))
CANCER<-as.factor(c("Cervical","Lung","Prostate","Breast","Cervical","Breast","Prostate","Prostate","Lung"))
rand_fact<-as.factor(c("rf1","rf2","rf3","fr4","fr5","rf2","rf3","fr4","fr5"))
rand_num<-as.numeric(c(4,3,5,7,3,5,3,24,9))
rand_chr<-as.character(c("a","d","r","e","g","y","r","e","k"))
TEST_DR<-data.frame(YR,STATE,COUNTY,CANCER,rand_fact,rand_num,rand_chr)
rm(YR,STATE,COUNTY,CANCER,rand_chr,rand_num,rand_fact)
TEST_DR<-arrange(.data = TEST_DR,YR,COUNTY)

> print(TEST_DR)
  YR STATE COUNTY   CANCER rand_fact rand_num rand_chr
2018    CA     C1 Cervical       fr5        3        g
2018    CA     C1     Lung       fr5        9        k
2018    MA     M1     Lung       rf2        3        d
2018    MA     M2   Breast       rf2        5        y
2019    CA     C1 Cervical       rf1        4        a
2019    KY     K1 Prostate       rf3        5        r
2019    KY     K1 Prostate       rf3        3        r
2019    KY     K2   Breast       fr4        7        e
2019    KY     K2 Prostate       fr4       24        e

YR这里有一个
tidyverse
解决方案

library(tidyr)
library(dplyr)

TEST_DR %>%                                # v-----------set this to "value" so that later we don't need to specify the column name for the argument "values_from"
  count(YR, STATE, COUNTY, CANCER, name = "value") %>%
  rows_insert(
    (.) %>% 
      group_by(STATE, COUNTY, YR) %>% 
      summarise(CANCER = "total", value = sum(value)),
    by = c("STATE", "COUNTY", "YR", "CANCER")
  ) %>% 
  arrange(YR, factor(CANCER, unique(CANCER))) %>% 
  pivot_wider(
    id_cols = c(STATE, COUNTY), 
    names_from = c(CANCER, YR), 
    values_fn = sum, 
    values_fill = 0L
  ) %>% 
  mutate(total_cancer = rowSums(across(starts_with("total"))))
输出

`summarise()` regrouping output by 'STATE', 'COUNTY' (override with `.groups` argument)
# A tibble: 5 x 11
  STATE COUNTY Cervical_2018 Lung_2018 Breast_2018 total_2018 Cervical_2019 Breast_2019 Prostate_2019 total_2019 total_cancer
  <fct> <fct>          <int>     <int>       <int>      <int>         <int>       <int>         <int>      <int>        <dbl>
1 CA    C1                 1         1           0          2             1           0             0          1            3
2 MA    M1                 0         1           0          1             0           0             0          0            1
3 MA    M2                 0         0           1          1             0           0             0          0            1
4 KY    K2                 0         0           0          0             0           1             1          2            2
5 KY    K1                 0         0           0          0             0           0             2          2            2

这真的很有帮助,谢谢!你有什么办法来计算总数吗?嗨@JasonDeutsch,看看我的更新。
`summarise()` regrouping output by 'STATE', 'COUNTY' (override with `.groups` argument)
# A tibble: 5 x 11
  STATE COUNTY Cervical_2018 Lung_2018 Breast_2018 total_2018 Cervical_2019 Breast_2019 Prostate_2019 total_2019 total_cancer
  <fct> <fct>          <int>     <int>       <int>      <int>         <int>       <int>         <int>      <int>        <dbl>
1 CA    C1                 1         1           0          2             1           0             0          1            3
2 MA    M1                 0         1           0          1             0           0             0          0            1
3 MA    M2                 0         0           1          1             0           0             0          0            1
4 KY    K2                 0         0           0          0             0           1             1          2            2
5 KY    K1                 0         0           0          0             0           0             2          2            2
library(reshape2)
library(dplyr)

TEST_DR %>% 
  count(YR, STATE, COUNTY, CANCER, name = "value") %>% 
  rows_insert(
    (.) %>% 
      group_by(STATE, COUNTY, YR) %>% 
      summarise(CANCER = "total", value = sum(value)),
    by = c("STATE", "COUNTY", "YR", "CANCER")
  ) %>% 
  arrange(YR, factor(CANCER, unique(CANCER))) %>% 
  dcast(STATE + COUNTY ~ CANCER + YR, sum) %>% 
  mutate(total_cancer = rowSums(across(starts_with("total"))))