dplyr：获取不同级别的面积和面积分布_R_Dplyr

dplyr：获取不同级别的面积和面积分布

dplyr：获取不同级别的面积和面积分布,r,dplyr,R,Dplyr,数据 df <- read.csv(url("https://www.dropbox.com/s/uaivja22czx2pe8/df_stats_question.csv?raw=1")) 我想做什么？ #I got the percent of area for each zone like below df1 <- df %>% dplyr::select(ID, param1, param2, area) %>% dplyr::arrange(ID,

数据

df <- read.csv(url("https://www.dropbox.com/s/uaivja22czx2pe8/df_stats_question.csv?raw=1"))

我想做什么？

#I got the percent of area for each zone like below
df1 <- df %>% 
  dplyr::select(ID, param1, param2, area) %>% 
  dplyr::arrange(ID, param1, param2) %>% 
  dplyr::group_by(ID, param1, param2) %>%
  dplyr::summarise(area = sum(area)) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::mutate(percent_area = area/sum(area) * 100)

head(df1)
#      ID param1 param2        area percent_area
#  <fctr> <fctr> <fctr>       <dbl>        <dbl>
#1  Zone1   High   High  1247.26891   1.60636374
#2  Zone1   High    Low  4725.57502   6.08609125
#3  Zone1   High Medium    10.06087   0.01295744
#4  Zone1    Low   High  1432.38859   1.84478029
#5  Zone1 Medium   High 44907.15570  57.83614608
#6  Zone1 Medium    Low 22036.19702  28.38052622

对于每个分区（

Zone1

到

Zone5

），我想得到

param1

和

param2

的不同组合占分区总面积的百分比以及

EVT\u mod

示例输出

#I want the output to be as below
#ID      param1   param2    percent_area      0-15  15-30  30-60   >60 
#zone1   High     High      10                2     3      4       1
#zone1   High     Medium    5                 0.5   2      0.5     2
#zone1   High     Low       15                3     4      5       3
#zone1   Medium   High      9                 3     2      3       1
#zone1   Medium   Medium    11                2     3      4       2
#zone1   Medium   Low       8                 0.7   0.3    3       4
#zone1   Low      High      7                 0.9   1.1    3       2
#zone1   Low      Medium    23                8     7      5       3
#zone1   Low      Low       12                7     2      1       2

我做了什么？

#I got the percent of area for each zone like below
df1 <- df %>% 
  dplyr::select(ID, param1, param2, area) %>% 
  dplyr::arrange(ID, param1, param2) %>% 
  dplyr::group_by(ID, param1, param2) %>%
  dplyr::summarise(area = sum(area)) %>% 
  dplyr::group_by(ID) %>% 
  dplyr::mutate(percent_area = area/sum(area) * 100)

head(df1)
#      ID param1 param2        area percent_area
#  <fctr> <fctr> <fctr>       <dbl>        <dbl>
#1  Zone1   High   High  1247.26891   1.60636374
#2  Zone1   High    Low  4725.57502   6.08609125
#3  Zone1   High Medium    10.06087   0.01295744
#4  Zone1    Low   High  1432.38859   1.84478029
#5  Zone1 Medium   High 44907.15570  57.83614608
#6  Zone1 Medium    Low 22036.19702  28.38052622

#我得到了每个区域的面积百分比，如下所示
df1%
dplyr:：选择（ID，参数1，参数2，区域）%>%
dplyr:：排列（ID，参数1，参数2）%>%
dplyr:：分组依据（ID，参数1，参数2）%>%
dplyr:：汇总（面积=总和（面积））%>%
dplyr:：分组依据（ID）%>%
dplyr：：突变（面积百分比=面积/总面积）*100）
主管（df1）
#ID参数1参数2面积百分比\面积
#                   
#1区1高-高1247.26891 1.60636374
#2区1高-低4725.57502 6.08609125
#3区1高中10.06087 0.01295744
#4第1区低-高1432.38859 1.84478029
#5区1中高44907.15570 57.83614608
#6区1中低22036.19702 28.38052622

问题

对于如何获得每个

EVT\u mod

级别的

面积百分比分布的任何建议，我们将不胜感激。
这方面如何？首先也是通过EVT_mod
进行分组，然后在列上展开，然后我们以类似的内容结束
首先，我更改这一行：
df <- df %>% 
  mutate(EVT_mod = ifelse (EVT <= 15, 'cat1', 
                           ifelse(EVT <= 30, 'cat2',
                                  ifelse(EVT <= 60, 'cat3', 'cat4'))))


#来源：本地数据帧[61 x 9]
#组别:ID[5]
# 
#ID参数1参数2 cat1 cat2 cat3 cat4面积百分比
#                                          
#1区1高-高1.34705031 0.25931343 0.00000000 0 1247.26891 1.60636374
#2区1高低5.59184841 0.49424283 0.00000000 0 4725.57502 6.08609125
#3区1高中0.01262533 0.00033211 0.00000000 0 10.06087 0.01295744
#4区1低-高1.84478029 0.00000000 0.00000000 0 1432.38859 1.84478029
#5区1中高56.31313681 1.52300927 0.00000000 0 44907.15570 57.83614608
#6区1中低18.64165645 9.73886978 0.00000000 0 22036.19702 28.38052622
#7第1区中型4.06436687 0.16876810 0.00000000 0 3286.83815 4.23313497
#8区2高-高30.03120766 10.13084134 0.01099552 0 11522.80578 40.17304453
#9区2高-低6.91574950 1.58340654 0.04628919 0 2451.08397 8.54544522
#10区2高中0.88955660 0.05981439 0.00000000 0 272.30741 0.94937100
# # ... 还有51行

df %>% 
  select(ID, param1, param2, area, EVT_mod) %>%
  group_by(ID, param1, param2, EVT_mod) %>%
  summarise(area = sum(area)) %>% 
  tidyr::spread(EVT_mod, area, fill = 0) %>% 
  mutate(area = sum(c(cat1, cat2, cat3, cat4))) %>% 
  group_by(ID) %>% 
  mutate(cat1 = cat1 / sum(area) * 100,
         cat2 = cat2 / sum(area) * 100,
         cat3 = cat3 / sum(area) * 100,
         cat4 = cat4 / sum(area) * 100,
         percent_area = area / sum(area) * 100) %>% 
  arrange(ID, param1, param2)

# Source: local data frame [61 x 9]
# Groups: ID [5]
# 
#        ID param1 param2        cat1        cat2       cat3  cat4        area percent_area
#    <fctr> <fctr> <fctr>       <dbl>       <dbl>      <dbl> <dbl>       <dbl>        <dbl>
# 1   Zone1   High   High  1.34705031  0.25931343 0.00000000     0  1247.26891   1.60636374
# 2   Zone1   High    Low  5.59184841  0.49424283 0.00000000     0  4725.57502   6.08609125
# 3   Zone1   High Medium  0.01262533  0.00033211 0.00000000     0    10.06087   0.01295744
# 4   Zone1    Low   High  1.84478029  0.00000000 0.00000000     0  1432.38859   1.84478029
# 5   Zone1 Medium   High 56.31313681  1.52300927 0.00000000     0 44907.15570  57.83614608
# 6   Zone1 Medium    Low 18.64165645  9.73886978 0.00000000     0 22036.19702  28.38052622
# 7   Zone1 Medium Medium  4.06436687  0.16876810 0.00000000     0  3286.83815   4.23313497
# 8   Zone2   High   High 30.03120766 10.13084134 0.01099552     0 11522.80578  40.17304453
# 9   Zone2   High    Low  6.91574950  1.58340654 0.04628919     0  2451.08397   8.54544522
# 10  Zone2   High Medium  0.88955660  0.05981439 0.00000000     0   272.30741   0.94937100
# # ... with 51 more rows