R 按类别将x轴分组

R 按类别将x轴分组,r,ggplot2,R,Ggplot2,这是我的数据框架 df <- structure(list(variable = c("B.Al", "OA.P", "NDVI_10", "OA.Al", "tpi2000.MEAN", "solar_rad_total_20m", "B.Al", "TAS_mean.MEAN", "solar_rad_total_

这是我的数据框架

df <- structure(list(variable = c("B.Al", "OA.P", "NDVI_10", "OA.Al", 
"tpi2000.MEAN", "solar_rad_total_20m", "B.Al", "TAS_mean.MEAN", 
"solar_rad_total_20m", "OA.pH", "tpi25.MEAN", "twi_dd.MEAN", 
"B.Al", "B.Ca", "TAS_slope.MEAN", "B.Ca", "NDWI_10", "TAS_slope.MEAN", 
"B.Ca", "OA.Ca", "TAS_slope.MEAN", "OA.Al", "B.Ca", "twi_dd.MEAN", 
"TAS_mean.MEAN", "tpi2000.MEAN", "twi_dd.STD", "OA.Ca", "OA.pH", 
"TAS_mean.MEAN", "OA.Ca", "tpi25.MEAN", "solar_rad_total_20m", 
"NDVI_10", "twi_dd.MEAN", "twi_dd.STD", "B.Ca", "B.Na", "tpi2000.MEAN", 
"OA.Na", "NDVI_10", "TAS_slope.MEAN", "B.Al", "tpi2000.MEAN", 
"NDVI_10", "TAS_mean.MEAN", "TAS_northness.MEAN", "solar_rad_total_20m", 
"OA.P", "TAS_mean.MEAN", "tpi2000.MEAN", "OA.Na", "OA.P", "NDVI_10", 
"B.Ca", "TAS_northness.MEAN", "tpi2000.MEAN", "OA.Al", "B.C_N", 
"TAS_mean.MEAN", "OA.Na", "tpi2000.MEAN", "twi_dd.MEAN", "OA.P", 
"OA.pH", "NDWI_10", "B.Ca", "OA.depth", "TAS_slope.MEAN", "OA.Al", 
"OA.Ca", "NDVI_10", "OA.Na", "OA.depth", "tpi25.MEAN", "B.Na", 
"TAS_slope.MEAN", "NDWI_10", "B.Ca", "OA.Na", "NDWI_10", "TAS_slope.MEAN", 
"OA.P", "twi_dd.MEAN", "B.P", "B.C", "twi_dd.STD", "OA.Na", "OA.P", 
"twi_dd.STD", "B.Al", "MCARI_MTVI", "TAS_mean.MEAN", "B.Al", 
"B.Ca", "B.P", "B.C_N", "TAS_slope.MEAN", "twi_dd.MEAN", "OA.Al", 
"TAS_mean.MEAN", "tpi2000.MEAN", "B.Al", "B.Ca", "NDWI_10", "B.Al", 
"B.Na", "tpi2000.MEAN", "OA.depth", "TAS_mean.MEAN", "TAS_northness.MEAN", 
"B.C_N", "TAS_mean.MEAN", "NDWI_10", "B.Na", "TAS_slope.MEAN", 
"twi_dd.STD", "B.Ca", "TAS_mean.MEAN", "NDWI_10"), variable_importance = c(0.0583456, 
0.0572622, 0.7949162, 0.145154, 0.1965898, 0.631507, 0.0319048, 
0.9834534, 0.0105422, 0.07857, 0.3157312, 0.403983, 0.095685, 
0.8925714, 0.0548878, 0.5588186, 0.0733602, 0.526027, 0.9339486, 
0.2531884, 0.048884, 0.123377, 0.6073132, 0.2345292, 0.66771, 
0.21304, 0.0367912, 0.2241128, 0.2298776, 0.5071346, 0.259179, 
0.6296734, 0.1123266, 0.3318268, 0.1044384, 0.5294008, 0.4846202, 
0.0590374, 0.4674416, 0.2007248, 0.2541912, 0.3864322, 0.1323852, 
0.3674916, 0.6370222, 0.9318416, 0.0174854, 0.0552058, 0.1484992, 
0.7697134, 0.213332, 0.2768872, 0.1104194, 0.612905, 0.8139634, 
0.0905556, 0.0680632, 0.071293, 0.1307058, 0.7604958, 0.2264404, 
0.4453206, 0.3187728, 0.4391702, 0.1647728, 0.396783, 0.8386238, 
0.1099, 0.1145692, 0.9350212, 0.0378414, 0.0344502, 0.2856692, 
0.260972, 0.4203974, 0.114788, 0.675816, 0.173443, 0.934695, 
0.0927296, 0.0231832, 0.7535372, 0.1556188, 0.0567598, 0.1814224, 
0.1409008, 0.6511174, 0.565503, 0.4724184, 0.0136072, 0.3129622, 
0.129463, 0.4748478, 0.2831364, 0.6665722, 0.0824932, 0.0504342, 
0.9397376, 0.0456134, 0.233926, 0.567116, 0.1146926, 0.4097234, 
0.2708894, 0.2494134, 0.1986246, 0.1362926, 0.7396076, 0.0500134, 
0.9081236, 0.0989256, 0.0865266, 0.7041882, 0.1453008, 0.1198452, 
0.8600394, 0.1203448, 0.4742014, 0.2307082, 0.1815164), variable_short = c("B.Al", 
"OA.P", "NDVI", "OA.Al", "tpi2000", "solar_rad", "B.Al", "elev.", 
"solar_rad", "OA.pH", "tpi25", "twi", "B.Al", "B.Ca", "slope", 
"B.Ca", "NDWI", "slope", "B.Ca", "OA.Ca", "slope", "OA.Al", "B.Ca", 
"twi", "elev.", "tpi2000", "twi_st.d", "OA.Ca", "OA.pH", "elev.", 
"OA.Ca", "tpi25", "solar_rad", "NDVI", "twi", "twi_st.d", "B.Ca", 
"B.Na", "tpi2000", "OA.Na", "NDVI", "slope", "B.Al", "tpi2000", 
"NDVI", "elev.", "northness", "solar_rad", "OA.P", "elev.", "tpi2000", 
"OA.Na", "OA.P", "NDVI", "B.Ca", "northness", "tpi2000", "OA.Al", 
"B.C_N", "elev.", "OA.Na", "tpi2000", "twi", "OA.P", "OA.pH", 
"NDWI", "B.Ca", "OA.depth", "slope", "OA.Al", "OA.Ca", "NDVI", 
"OA.Na", "OA.depth", "tpi25", "B.Na", "slope", "NDWI", "B.Ca", 
"OA.Na", "NDWI", "slope", "OA.P", "twi", "B.P", "B.C", "twi_st.d", 
"OA.Na", "OA.P", "twi_st.d", "B.Al", "MCARI_MTVI", "elev.", "B.Al", 
"B.Ca", "B.P", "B.C_N", "slope", "twi", "OA.Al", "elev.", "tpi2000", 
"B.Al", "B.Ca", "NDWI", "B.Al", "B.Na", "tpi2000", "OA.depth", 
"elev.", "northness", "B.C_N", "elev.", "NDWI", "B.Na", "slope", 
"twi_st.d", "B.Ca", "elev.", "NDWI"), class = c("soil", "soil", 
"spectral", "soil", "topo", "topo", "soil", "topo", "topo", "soil", 
"topo", "topo", "soil", "soil", "topo", "soil", "spectral", "topo", 
"soil", "soil", "topo", "soil", "soil", "topo", "topo", "topo", 
"topo", "soil", "soil", "topo", "soil", "topo", "topo", "spectral", 
"topo", "topo", "soil", "soil", "topo", "soil", "spectral", "topo", 
"soil", "topo", "spectral", "topo", "topo", "topo", "soil", "topo", 
"topo", "soil", "soil", "spectral", "soil", "topo", "topo", "soil", 
"soil", "topo", "soil", "topo", "topo", "soil", "soil", "spectral", 
"soil", "soil", "topo", "soil", "soil", "spectral", "soil", "soil", 
"topo", "soil", "topo", "spectral", "soil", "soil", "spectral", 
"topo", "soil", "topo", "soil", "soil", "topo", "soil", "soil", 
"topo", "soil", "spectral", "topo", "soil", "soil", "soil", "soil", 
"topo", "topo", "soil", "topo", "topo", "soil", "soil", "spectral", 
"soil", "soil", "topo", "soil", "topo", "topo", "soil", "topo", 
"spectral", "soil", "topo", "topo", "soil", "topo", "spectral"
)), row.names = c(NA, -120L), class = "data.frame")
这里是情节

我想把x轴组织起来,这样变量类就在一起了,即红色是全部在一起,蓝色是一起,绿色是一起,而不是按字母顺序组织

我在aes函数中找到了基于数值使用重新排序来组织x轴的很好的解决方案,但是很惊讶没有找到关于如何按类别组织x轴的好文档。我试图申请按组组织x轴的重新排序,但没有成功

我意识到我可以使用类似于以下信用的东西:,加文·辛普森回答

df$variable_short <- factor(df$variable_short, levels = c("...."))
但似乎应该有一个更优雅的解决方案


重新排序可以用于组还是有类似的优雅解决方案?

使用此解决方案,您可以在不使用dplyr管道的情况下应用双重重新排序

ggplot(df, aes(x = reorder(reorder(variable_short, -variable_importance, mean), as.numeric(factor(class))),
               y = variable_importance)) + 
 geom_boxplot(aes(colour = class), outlier.colour = NA) + 
 geom_point(aes(group = class), position = position_dodge(width = 0.75))
中间带可能在视觉上更吸引人,因为方框的中间线是有序的

ggplot(df, aes(x = reorder(reorder(variable_short, -variable_importance, median), as.numeric(factor(class))),
               y = variable_importance)) + 
 geom_boxplot(aes(colour = class), outlier.colour = NA) + 
 geom_point(aes(group = class), position = position_dodge(width = 0.75))

使用此解决方案,您可以在不使用dplyr管道的情况下应用双重重新排序

ggplot(df, aes(x = reorder(reorder(variable_short, -variable_importance, mean), as.numeric(factor(class))),
               y = variable_importance)) + 
 geom_boxplot(aes(colour = class), outlier.colour = NA) + 
 geom_point(aes(group = class), position = position_dodge(width = 0.75))
中间带可能在视觉上更吸引人,因为方框的中间线是有序的

ggplot(df, aes(x = reorder(reorder(variable_short, -variable_importance, median), as.numeric(factor(class))),
               y = variable_importance)) + 
 geom_boxplot(aes(colour = class), outlier.colour = NA) + 
 geom_point(aes(group = class), position = position_dodge(width = 0.75))
我们可以使用reorder按y轴变量_重要性的负平均值对x轴变量_short factor进行排序,并使用刻面将颜色组合在一起。这还添加了漂亮的刻面标签,因此您可以删除图例:

df$variable_short = reorder(df$variable_short, -df$variable_importance)
ggplot(df, aes(x = variable_short, y = variable_importance)) + 
  geom_boxplot(aes(colour = class), outlier.colour = NA) + 
  geom_point(position = position_dodge(width = 0.75), aes(group = class)) +
  facet_grid(~class, scales = "free_x", space = "free_x")
如果您不想要面,我们可以计算正确的顺序,并使用factor的levels参数进行设置:

library(dplyr)
var_order = df %>% 
  group_by(class, variable_short) %>%
  summarize(var_mean = mean(variable_importance)) %>%
  ungroup() %>%
  arrange(class, desc(var_mean)) %>%
  pull(variable_short)

df$variable_short = factor(df$variable_short, levels = var_order)
ggplot(df, aes(x = variable_short, y = variable_importance)) + 
  geom_boxplot(aes(colour = class), outlier.colour = NA) + 
  geom_point(position = position_dodge(width = 0.75), aes(group = class))
我们可以使用reorder按y轴变量_重要性的负平均值对x轴变量_short factor进行排序,并使用刻面将颜色组合在一起。这还添加了漂亮的刻面标签,因此您可以删除图例:

df$variable_short = reorder(df$variable_short, -df$variable_importance)
ggplot(df, aes(x = variable_short, y = variable_importance)) + 
  geom_boxplot(aes(colour = class), outlier.colour = NA) + 
  geom_point(position = position_dodge(width = 0.75), aes(group = class)) +
  facet_grid(~class, scales = "free_x", space = "free_x")
如果您不想要面,我们可以计算正确的顺序,并使用factor的levels参数进行设置:

library(dplyr)
var_order = df %>% 
  group_by(class, variable_short) %>%
  summarize(var_mean = mean(variable_importance)) %>%
  ungroup() %>%
  arrange(class, desc(var_mean)) %>%
  pull(variable_short)

df$variable_short = factor(df$variable_short, levels = var_order)
ggplot(df, aes(x = variable_short, y = variable_importance)) + 
  geom_boxplot(aes(colour = class), outlier.colour = NA) + 
  geom_point(position = position_dodge(width = 0.75), aes(group = class))

蓝色、红色和绿色组合在一起很有意义,也很简单。您关心每个颜色组中的顺序吗?具体的顺序?字母顺序?按平均值下降,按最大值上升……我想看看如何在小组内按平均值下降。谢谢另请参见关于这个问题的,它有更多的答案和各种情况下的更多方法。@GregorThomas,谢谢你的链接。这也是我分享的链接之一。在那个链接中,我没有看到一个特定于此场景的解决方案,但对于您所述的类似情况,它有几个很好的答案。。。,level=my_order您可以设置级别的顺序,从而按照您想要编写/计算的任何顺序设置轴的顺序-这是常见问题解答前3个答案中的每一个以及我的答案中的第二个选项以不同方式使用的通用解决方案。但是,这里并没有像你们这样的群体的具体例子。蓝色、红色和绿色组合在一起很有意义,也很简单。您关心每个颜色组中的顺序吗?具体的顺序?字母顺序?按平均值下降,按最大值上升……我想看看如何在小组内按平均值下降。谢谢另请参见关于这个问题的,它有更多的答案和各种情况下的更多方法。@GregorThomas,谢谢你的链接。这也是我分享的链接之一。在那个链接中,我没有看到一个特定于此场景的解决方案,但对于您所述的类似情况,它有几个很好的答案。。。,level=my_order您可以设置级别的顺序,从而按照您想要编写/计算的任何顺序设置轴的顺序-这是常见问题解答前3个答案中的每一个以及我的答案中的第二个选项以不同方式使用的通用解决方案。但是是的,这里没有像你们这样的团体的具体例子。