R 如何在没有不需要的条纹的情况下对数据进行梳理并绘制比例条形图
请在下面的屏幕截图中找到输入数据和预期输出: 但是,当前绘图显示以下代码: 我觉得,我把事情弄得太复杂了。但我一直在共享输入数据和预期数据以及代码。你能帮我们吗 主要有两个问题。 1.如果使用了mutate,则不需要的条纹会出现在绘图上 如果使用了汇总,则不会增加到100% 2.我们如何提取最主要的贡献者 我们两人都试过,但都被卡住了R 如何在没有不需要的条纹的情况下对数据进行梳理并绘制比例条形图,r,dplyr,grouping,bar-chart,ggplotly,R,Dplyr,Grouping,Bar Chart,Ggplotly,请在下面的屏幕截图中找到输入数据和预期输出: 但是,当前绘图显示以下代码: 我觉得,我把事情弄得太复杂了。但我一直在共享输入数据和预期数据以及代码。你能帮我们吗 主要有两个问题。 1.如果使用了mutate,则不需要的条纹会出现在绘图上 如果使用了汇总,则不会增加到100% 2.我们如何提取最主要的贡献者 我们两人都试过,但都被卡住了 # Input data df <- tibble( country = c(rep(c("India","USA","Germany","Afr
# Input data
df <- tibble(
country = c(rep(c("India","USA","Germany","Africa"), each = 8)),
type = c("sms","Other","whatsapp","web","online","shiny","whatsapp","whatsapp",
"sms","sms","sms","web","web","Other","online","whatsapp",
"sms","Other","whatsapp","shiny","online","shiny","whatsapp","whatsapp",
"sms","sms","sms","shiny","online","Other","online","Other"
),
cust = rep(c("google","Apple","wallmart","pg"),8),
quantity = c(10,20,30,40,50,60,70,80,
90,100,15,25,35,45,55,65,
75,85,95,105,10,15,20,25,
30,35,40,45,50,55,60,65)
)
# Without Customer
df %>%
group_by(country,type) %>%
summarise(kpi_wo_cust = sum(quantity)) %>%
ungroup() -> df_wo_cust
# With Customer
df %>%
group_by(country,type,cust) %>%
summarise(kpi_cust = sum(quantity)) %>%
ungroup() -> df_cust
df_combo <- left_join(df_cust, df_wo_cust, by = c("country","type"))
df_combo %>% glimpse()
# Aggregated data for certain KPIs for final plot
df_aggr <- df_combo %>%
group_by(country,type) %>%
mutate(kpi_cust_total = sum(kpi_cust),
per_kpi_cust = 100 * (kpi_cust/kpi_cust_total)) %>%
group_by(country) %>%
# In order to except from repeated counting, selecting unique()
mutate(kpi_cust_uniq_total = sum(kpi_cust) %>% unique(),
per_unq_kpi_cust = 100 * (kpi_cust/kpi_cust_uniq_total) %>% round(4))
#
plt = df_aggr %>% ungroup() %>%#glimpse()
# In order to obtain theTop 2 customers (Major contributor) within country and type
# However, if this code is used, there is an error
# group_by(country, type) %>%
# nest() %>%
# mutate(top_cust = purrr::map_chr(data, function(x){
# x %>% arrange(desc(per_kpi_cust)) %>%
# top_n(2,per_kpi_cust) %>%
# summarise(Cust = paste(cust,round(per_kpi_cust,2), collapse = "<br>")) %>%
# pull(cust)
# })#,data = NULL
# ) %>%
# unnest(cols = data) %>%
group_by(country, type) %>%
# If mutate is used, undesired stripes appear on the plot
# Summarize used, then it is not adding to 100%
mutate(avg_kpi_cust = per_unq_kpi_cust %>% mean()) %>%
#summarise(avg_kpi_cust = per_unq_kpi_cust %>% mean()) %>%
ggplot(aes(x = country,
y = avg_kpi_cust,
fill = type,
text = paste('<br>proportion: ', round(avg_kpi_cust,2), "%",
"<br>country:",country
))) +
geom_bar(stat = "identity"#, position=position_dodge()
) +
coord_flip() +
theme_bw()
ggplotly(plt)
#输入数据
df%
按(国家、类型)分组%>%
总结(kpi_wo_cust=总和(数量))%>%
解组()->df_wo_cust
#与客户
df%>%
分组依据(国家、类型、客户)%>%
总结(kpi_cust=总和(数量))%>%
解组()->df_cust
df_组合%s()
#最终绘图特定KPI的聚合数据
df_累计百分比
按(国家、类型)分组%>%
变更(kpi_cust_总计=总和(kpi_cust),
每kpi客户=100*(kpi客户/kpi客户总计))%>%
按(国家)划分的组别%>%
#为了避免重复计数,选择unique()
mutate(kpi_cust_uniq_total=sum(kpi_cust)%%>%unique(),
每季度kpi客户=100*(kpi客户/kpi客户总计%>%四轮)
#
plt=df_aggr%%>%ungroup()%%>%
#为了在国家和类型内获得前2名客户(主要贡献者)
#但是,如果使用此代码,则会出现错误
#按(国家、类型)分组%>%
#嵌套()%>%
#mutate(top\u cust=purrr::map\u chr(数据,函数(x)){
#x%>%排列(描述(每个kpi客户))%>%
#排名靠前(每客户2名)%>%
#总结(Cust=粘贴(Cust,四舍五入(按kpi),折叠=“
”)%%
#拉(客户)
#})#,数据=NULL
# ) %>%
#unnest(cols=数据)%>%
按(国家、类型)分组%>%
#如果使用了mutate,则不需要的条纹会出现在绘图上
#如果使用了汇总,则不会增加到100%
变异(平均kpi客户=每季度kpi客户%>%mean())%%>%
#总结(平均kpi客户=每季度kpi客户%>%mean())%%>%
ggplot(aes(x=国家,
y=平均kpi客户,
填充=类型,
text=粘贴(“
比例:”,四舍五入(平均kpi客户数,2),“%”,
“
国家:”,国家
))) +
geom_条(stat=“identity”#,position=position_道奇()
) +
coord_flip()+
主题_bw()
ggplotly(plt)
关键是在mutate()之后使用distinct()而不是summary()
此外,mean()是之前使用的错误函数,而不是sum(),这导致了不完整的条形图
library(tidyverse)
library(plotly)
# Input data
df <- tibble(
country = c(rep(c("India","USA","Germany","Africa"), each = 8)),
type = c("sms","Other","whatsapp","web","online","shiny","whatsapp","whatsapp",
"sms","sms","sms","web","web","Other","online","whatsapp",
"sms","Other","whatsapp","shiny","online","shiny","whatsapp","whatsapp",
"sms","sms","sms","shiny","online","Other","online","Other"
),
cust = rep(c("google","Apple","wallmart","pg"),8),
quantity = c(10,20,30,40,50,60,70,80,
90,100,15,25,35,45,55,65,
75,85,95,105,10,15,20,25,
30,35,40,45,50,55,60,65)
)
# Without Customer
df %>%
group_by(country,type) %>%
summarise(kpi_wo_cust = sum(quantity)) %>%
ungroup() -> df_wo_cust
# With Customer
df %>%
group_by(country,type,cust) %>%
summarise(kpi_cust = sum(quantity)) %>%
ungroup() -> df_cust
df_combo <- left_join(df_cust, df_wo_cust, by = c("country","type"))
df_combo %>% glimpse()
# Aggregated data for certain KPIs for final plot
df_aggr <- df_combo %>%
group_by(country,type) %>%
mutate(kpi_cust_total = sum(kpi_cust),
per_kpi_cust = 100 * (kpi_cust/kpi_cust_total)) %>%
group_by(country) %>%
# In order to except from repeated counting, selecting unique()
mutate(kpi_cust_uniq_total = sum(kpi_cust) %>% unique(),
per_unq_kpi_cust = 100 * (kpi_cust/kpi_cust_uniq_total) %>% round(4))
plt = df_aggr %>% ungroup() %>%
# In order to diplay Top 2 customers (Major contributor) within country and type
group_by(country, type) %>%
nest() %>%
mutate(top_cust = purrr::map_chr(data, function(x){
x %>% arrange(desc(per_kpi_cust)) %>%
top_n(2,per_kpi_cust) %>%
summarise(Cust = paste(cust,round(per_kpi_cust,2), collapse = "<br>")) %>%
pull(Cust)
})) %>%
unnest(cols = data) %>%
group_by(country, type) %>%
# If mutate is used, undesired stripes appear on the plot
# Summarize used, then it is not adding to 100%.
# So distinct was used
mutate(avg_kpi_cust = per_unq_kpi_cust %>% sum()) %>%
ungroup() %>%
distinct(country, type, .keep_all = T) %>%
ggplot(aes(x = country,
y = avg_kpi_cust,
fill = type,
text = top_cust
)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_bw()
ggplotly(plt, tooltip = "text")
库(tidyverse)
图书馆(绘本)
#输入数据
df%
按(国家、类型)分组%>%
总结(kpi_wo_cust=总和(数量))%>%
解组()->df_wo_cust
#与客户
df%>%
分组依据(国家、类型、客户)%>%
总结(kpi_cust=总和(数量))%>%
解组()->df_cust
df_组合%s()
#最终绘图特定KPI的聚合数据
df_累计百分比
按(国家、类型)分组%>%
变更(kpi_cust_总计=总和(kpi_cust),
每kpi客户=100*(kpi客户/kpi客户总计))%>%
按(国家)划分的组别%>%
#为了避免重复计数,选择unique()
mutate(kpi_cust_uniq_total=sum(kpi_cust)%%>%unique(),
每季度kpi客户=100*(kpi客户/kpi客户总计%>%四轮)
plt=df_aggr%%>%ungroup()%%>%
#为了在国家/地区和类型内展示前2名客户(主要贡献者)
按(国家、类型)分组%>%
嵌套()%>%
mutate(top\u cust=purrr::map\u chr(数据,函数(x)){
x%>%排列(描述(每个kpi客户))%>%
排名靠前(每客户2名)%>%
总结(Cust=粘贴(Cust,四舍五入(按kpi),折叠=“
”)%%
拉(客户)
})) %>%
unnest(cols=数据)%>%
按(国家、类型)分组%>%
#如果使用了mutate,则不需要的条纹会出现在绘图上
#如果使用了汇总,则不会添加到100%。
#使用了如此独特的方法
变异(平均kpi客户=每季度kpi客户%>%sum())%%>%
解组()%>%
不同(国家/地区,类型,.keep_all=T)%>%
ggplot(aes(x=国家,
y=平均kpi客户,
填充=类型,
text=top\u cust
)) +
几何图形栏(stat=“identity”)+
coord_flip()+
主题_bw()
ggplotly(plt,tooltip=“text”)