R 在面_包裹中绘制平均线
我有以下数据集:R 在面_包裹中绘制平均线,r,ggplot2,R,Ggplot2,我有以下数据集: structure(list(Geschaeft = c(0.0961028525512254, 0.0753516756309475, 0, 0.0722803347280335, 0, 0.000877706260971328), Gaststaette = c(0.0981116914423463, 0.0789718659495242, 0.0336538461538462, 0.0905857740585774, 0, 0.00175541252194266),
structure(list(Geschaeft = c(0.0961028525512254, 0.0753516756309475,
0, 0.0722803347280335, 0, 0.000877706260971328), Gaststaette = c(0.0981116914423463,
0.0789718659495242, 0.0336538461538462, 0.0905857740585774, 0,
0.00175541252194266), Bank = c(0.100843712334271, 0.0717832023169218,
0.00480769230769231, 0.025, 0.00571428571428572, 0.00965476887068461
), Hausarzt = c(0.0633989554037766, 0.0589573851882499, 0.0288461538461538,
0.0217573221757322, 0.00685714285714286, 0.0128730251609128),
Einr..F..Aeltere = c(0.0337484933708317, 0.0550268928423666,
0.00480769230769231, 0, 0.00114285714285714, 0.000292568753657109
), Park = c(0.0738449176376055, 0.0726623913942904, 0.0625,
0.0846234309623431, 0.00228571428571429, 0.112053832650673
), Sportstaette = c(0.0449979911611089, 0.0612846503930492,
0.00480769230769231, 0.0619246861924686, 0.00114285714285714,
0), OEPNV = c(0.10847730012053, 0.089056681836988, 0.264423076923077,
0.135669456066946, 0, 0.185488589818607), Mangel.an.Gruenflaechen = c(0.0867818400964243,
0.071369466280513, 0.144230769230769, 0.117259414225941,
0.260571428571429, 0.186951433586893), Kriminalitaet = c(0.108316593009241,
0.083678113363674, 0.389423076923077, 0.139330543933054,
0.334857142857143, 0.216500877706261), Auslaender = c(0.00715146645239052,
0.0212039718659495, 0.0480769230769231, 0.0550209205020921,
0.0114285714285714, 0), Umweltbelastung = c(0.108879067898755,
0.0846607364501448, 0, 0.143828451882845, 0.376, 0.228203627852545
), Einr..f..Kinder = c(0.0693451185214946, 0.0825403392635499,
0.0144230769230769, 0.0527196652719665, 0, 0.0444704505558806
), Einr..f..Jugendliche = c(0, 0.0934526272238312, 0, 0,
0, 0.000877706260971328), count = c(1466, 1821, 81, 1491,
330, 793), cluster = c(1, 2, 3, 4, 5, 6)), .Names = c("Geschaeft",
"Gaststaette", "Bank", "Hausarzt", "Einr..F..Aeltere", "Park",
"Sportstaette", "OEPNV", "Mangel.an.Gruenflaechen", "Kriminalitaet",
"Auslaender", "Umweltbelastung", "Einr..f..Kinder", "Einr..f..Jugendliche",
"count", "cluster"), row.names = c(NA, -6L), class = "data.frame")
我认为是这样的
mdf <- melt(nbhpp[,-15], id.vars = 'cluster')
mdf <- transform(mdf, variable = reorder(variable, value, mean), y = cluster)
如果我理解正确,transform函数将按平均值对数据进行排序。但如何将这些平均值作为灰线包含到每个绘图中
谢谢你的帮助
更新: 请澄清: 如果我看一下reorder语句的输出
with(mdf, reorder(variable, value, mean))
然后我得到以下属性:
attr(,"scores")
Einr..f..Jugendliche Einr..F..Aeltere Auslaender Sportstaette
0.01572172 0.01583642 0.02381364 0.02902631
Hausarzt Bank Geschaeft Einr..f..Kinder
0.03211500 0.03630061 0.04076876 0.04391644
Gaststaette Park OEPNV Mangel.an.Gruenflaechen
0.05051310 0.06799505 0.13051918 0.14452739
Umweltbelastung Kriminalitaet
0.15692865 0.21201772
在绘图中从左(最低)到右(最高)排序。
问题是,如何用这些属性画一条线…创建一个包含每组平均值的数据框。在R中有很多很多方法可以做到这一点,例如
means <- ddply(mdf, .(y), summarise, mean = mean(value))
编辑答案
要添加一行聚类平均值,需要构造一个包含数据的data.frame
。您可以从mdf
中提取值:
meanscores <- attributes(mdf$variable)$scores
meandf <- data.frame(
variable = rep(names(meanscores), 6),
value = rep(unname(meanscores), 6),
cluster = rep(1:6, each=14)
)
原始答案 我最初的解释是,你想要一条有整体意义的水平线 只需在绘图中添加一个
geom_hline
图层,然后将yintercept
映射到mean(value)
:
澄清一下:你想在每个方面都画一条水平线,表示该方面的平均值吗?我不是这个意思。。。但我在上面澄清了这不是我的意思。。。但我还是要谢谢你!这就是我要找的。
p + geom_hline(aes(yintercept = mean), data = means)
meanscores <- attributes(mdf$variable)$scores
meandf <- data.frame(
variable = rep(names(meanscores), 6),
value = rep(unname(meanscores), 6),
cluster = rep(1:6, each=14)
)
ggplot(mdf, aes(x=variable, y=value, group=cluster, colour=factor(cluster))) +
geom_line() +
scale_y_continuous('Anteile', formatter = "percent") +
scale_colour_hue(name='Cluster') +
xlab('Infrastrukturmerkmal') +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1), legend.position = "none") +
facet_wrap(~cluster, ncol=3) +
geom_line(data=meandf, aes(x=variable, y=value), colour="grey50")
ggplot(mdf, aes(x=variable, y=value, group=cluster, colour=factor(cluster))) +
geom_line() +
scale_y_continuous('Anteile', formatter = "percent") +
scale_colour_hue(name='Cluster') +
xlab('Infrastrukturmerkmal') +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1), legend.position = "none") +
facet_wrap(~cluster, ncol=3) +
geom_hline(aes(yintercept=mean(value)), colour="grey50")