dplyr:获取不同级别的面积和面积分布
数据dplyr:获取不同级别的面积和面积分布,r,dplyr,R,Dplyr,数据 df <- read.csv(url("https://www.dropbox.com/s/uaivja22czx2pe8/df_stats_question.csv?raw=1")) 我想做什么? #I got the percent of area for each zone like below df1 <- df %>% dplyr::select(ID, param1, param2, area) %>% dplyr::arrange(ID,
df <- read.csv(url("https://www.dropbox.com/s/uaivja22czx2pe8/df_stats_question.csv?raw=1"))
我想做什么?
#I got the percent of area for each zone like below
df1 <- df %>%
dplyr::select(ID, param1, param2, area) %>%
dplyr::arrange(ID, param1, param2) %>%
dplyr::group_by(ID, param1, param2) %>%
dplyr::summarise(area = sum(area)) %>%
dplyr::group_by(ID) %>%
dplyr::mutate(percent_area = area/sum(area) * 100)
head(df1)
# ID param1 param2 area percent_area
# <fctr> <fctr> <fctr> <dbl> <dbl>
#1 Zone1 High High 1247.26891 1.60636374
#2 Zone1 High Low 4725.57502 6.08609125
#3 Zone1 High Medium 10.06087 0.01295744
#4 Zone1 Low High 1432.38859 1.84478029
#5 Zone1 Medium High 44907.15570 57.83614608
#6 Zone1 Medium Low 22036.19702 28.38052622
对于每个分区(Zone1
到Zone5
),我想得到param1
和param2
的不同组合占分区总面积的百分比
以及EVT\u mod
示例输出
#I want the output to be as below
#ID param1 param2 percent_area 0-15 15-30 30-60 >60
#zone1 High High 10 2 3 4 1
#zone1 High Medium 5 0.5 2 0.5 2
#zone1 High Low 15 3 4 5 3
#zone1 Medium High 9 3 2 3 1
#zone1 Medium Medium 11 2 3 4 2
#zone1 Medium Low 8 0.7 0.3 3 4
#zone1 Low High 7 0.9 1.1 3 2
#zone1 Low Medium 23 8 7 5 3
#zone1 Low Low 12 7 2 1 2
我做了什么?
#I got the percent of area for each zone like below
df1 <- df %>%
dplyr::select(ID, param1, param2, area) %>%
dplyr::arrange(ID, param1, param2) %>%
dplyr::group_by(ID, param1, param2) %>%
dplyr::summarise(area = sum(area)) %>%
dplyr::group_by(ID) %>%
dplyr::mutate(percent_area = area/sum(area) * 100)
head(df1)
# ID param1 param2 area percent_area
# <fctr> <fctr> <fctr> <dbl> <dbl>
#1 Zone1 High High 1247.26891 1.60636374
#2 Zone1 High Low 4725.57502 6.08609125
#3 Zone1 High Medium 10.06087 0.01295744
#4 Zone1 Low High 1432.38859 1.84478029
#5 Zone1 Medium High 44907.15570 57.83614608
#6 Zone1 Medium Low 22036.19702 28.38052622
#我得到了每个区域的面积百分比,如下所示
df1%
dplyr::选择(ID,参数1,参数2,区域)%>%
dplyr::排列(ID,参数1,参数2)%>%
dplyr::分组依据(ID,参数1,参数2)%>%
dplyr::汇总(面积=总和(面积))%>%
dplyr::分组依据(ID)%>%
dplyr::突变(面积百分比=面积/总面积)*100)
主管(df1)
#ID参数1参数2面积百分比\面积
#
#1区1高-高1247.26891 1.60636374
#2区1高-低4725.57502 6.08609125
#3区1高中10.06087 0.01295744
#4第1区低-高1432.38859 1.84478029
#5区1中高44907.15570 57.83614608
#6区1中低22036.19702 28.38052622
问题
对于如何获得每个
EVT\u mod
级别的面积百分比分布的任何建议,我们将不胜感激。这方面如何?首先也是通过EVT_mod
进行分组,然后在列上展开,然后我们以类似的内容结束
首先,我更改这一行:
df <- df %>%
mutate(EVT_mod = ifelse (EVT <= 15, 'cat1',
ifelse(EVT <= 30, 'cat2',
ifelse(EVT <= 60, 'cat3', 'cat4'))))
#来源:本地数据帧[61 x 9]
#组别:ID[5]
#
#ID参数1参数2 cat1 cat2 cat3 cat4面积百分比
#
#1区1高-高1.34705031 0.25931343 0.00000000 0 1247.26891 1.60636374
#2区1高低5.59184841 0.49424283 0.00000000 0 4725.57502 6.08609125
#3区1高中0.01262533 0.00033211 0.00000000 0 10.06087 0.01295744
#4区1低-高1.84478029 0.00000000 0.00000000 0 1432.38859 1.84478029
#5区1中高56.31313681 1.52300927 0.00000000 0 44907.15570 57.83614608
#6区1中低18.64165645 9.73886978 0.00000000 0 22036.19702 28.38052622
#7第1区中型4.06436687 0.16876810 0.00000000 0 3286.83815 4.23313497
#8区2高-高30.03120766 10.13084134 0.01099552 0 11522.80578 40.17304453
#9区2高-低6.91574950 1.58340654 0.04628919 0 2451.08397 8.54544522
#10区2高中0.88955660 0.05981439 0.00000000 0 272.30741 0.94937100
# # ... 还有51行
df %>%
select(ID, param1, param2, area, EVT_mod) %>%
group_by(ID, param1, param2, EVT_mod) %>%
summarise(area = sum(area)) %>%
tidyr::spread(EVT_mod, area, fill = 0) %>%
mutate(area = sum(c(cat1, cat2, cat3, cat4))) %>%
group_by(ID) %>%
mutate(cat1 = cat1 / sum(area) * 100,
cat2 = cat2 / sum(area) * 100,
cat3 = cat3 / sum(area) * 100,
cat4 = cat4 / sum(area) * 100,
percent_area = area / sum(area) * 100) %>%
arrange(ID, param1, param2)
# Source: local data frame [61 x 9]
# Groups: ID [5]
#
# ID param1 param2 cat1 cat2 cat3 cat4 area percent_area
# <fctr> <fctr> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 Zone1 High High 1.34705031 0.25931343 0.00000000 0 1247.26891 1.60636374
# 2 Zone1 High Low 5.59184841 0.49424283 0.00000000 0 4725.57502 6.08609125
# 3 Zone1 High Medium 0.01262533 0.00033211 0.00000000 0 10.06087 0.01295744
# 4 Zone1 Low High 1.84478029 0.00000000 0.00000000 0 1432.38859 1.84478029
# 5 Zone1 Medium High 56.31313681 1.52300927 0.00000000 0 44907.15570 57.83614608
# 6 Zone1 Medium Low 18.64165645 9.73886978 0.00000000 0 22036.19702 28.38052622
# 7 Zone1 Medium Medium 4.06436687 0.16876810 0.00000000 0 3286.83815 4.23313497
# 8 Zone2 High High 30.03120766 10.13084134 0.01099552 0 11522.80578 40.17304453
# 9 Zone2 High Low 6.91574950 1.58340654 0.04628919 0 2451.08397 8.54544522
# 10 Zone2 High Medium 0.88955660 0.05981439 0.00000000 0 272.30741 0.94937100
# # ... with 51 more rows