在R中重塑数据
我有一个数据集显示在文章的底部。数据有四列,分别为SIC、AT95Group、AT95Mean和AT95Med。AT95Group列具有四个值,例如“00”、“01”、“11”和“10”。目前,对于每个SIC,AT95Group的每个值都有四行。我想重塑数据帧,使每个SIC只有一行。虽然之前我们为每个(SIC,AT95Group)对创建了两个名为mean和med的列,但我们希望创建4*2个列(4个用于组“00”、“11”、“01”、“10”),2个用于(“mean”和“med”)。八列类似于“00Mean”、“11Mean”、“00Med”、“11Med”等,每个SIC都有相应的值 我觉得这极难做到。有什么建议吗。谢谢在R中重塑数据,r,R,我有一个数据集显示在文章的底部。数据有四列,分别为SIC、AT95Group、AT95Mean和AT95Med。AT95Group列具有四个值,例如“00”、“01”、“11”和“10”。目前,对于每个SIC,AT95Group的每个值都有四行。我想重塑数据帧,使每个SIC只有一行。虽然之前我们为每个(SIC,AT95Group)对创建了两个名为mean和med的列,但我们希望创建4*2个列(4个用于组“00”、“11”、“01”、“10”),2个用于(“mean”和“med”)。八列类似于“0
> dput(head(pp,20))
structure(list(SIC = c(1L, 1L, 1L, 10L, 10L, 10L, 10L, 12L, 12L,
12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L), AT95Group = c("11",
"10", "00", "11", "01", "00", "10", "01", "11", "10", "00", "11",
"01", "00", "10", "11", "01", "10", "00", "01"), AT95Med = c(0.0691039117115276,
0.0608649722972575, 0.0609974198491522, 0.215571816296268, 0.305308985848382,
0.351312558091798, 0.352704719896703, 0.0459887720804718, 0.0304466181779069,
0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 0.143724642017362,
0.156092793582293, 0.0976059673595903, 0.0116620184564053, 0.0188895210677074,
0.0356836223212195, 0.0513040852859517, 0.0982448708035204),
AT95Mean = c(0.0691039117115276, 0.0608649722972575, 0.0609974198491522,
0.215571816296268, 0.305308985848382, 0.351312558091798,
0.352704719896703, 0.0459887720804718, 0.0304466181779069,
0.0513875431555943, 0.0541431932578377, 0.0650920855876547,
0.143724642017362, 0.156092793582293, 0.0976059673595903,
0.0116620184564053, 0.0188895210677074, 0.0356836223212195,
0.0513040852859517, 0.0982448708035204)), .Names = c("SIC",
"AT95Group", "AT95Med", "AT95Mean"), row.names = c(241L, 236L,
27L, 1126L, 1035L, 1030L, 664L, 1269L, 1259L, 1245L, 1244L, 3919L,
4722L, 3329L, 3222L, 4886L, 4889L, 4951L, 4860L, 5108L), class = "data.frame")
尝试上述代码的尝试失败。不知道如何继续前进
pp <- unique(dacc1[,c("SIC","AT95Group","AT95Med","AT95Mean")])
xsic <- unique(pp[,"SIC"]);
xlist <- list(xsic,rep("AT95",length(xsic)));
pp我希望这个解决方案不要太神秘:
xsic <- unique(pp[,"SIC"]);
AT = c("00", "01", "10", "11")
d = data.frame(xsic=xsic);
for(i in 1:4) {
subgroup = pp[ pp$AT95Group==AT[i],];
d[[paste0(AT[i],"AT95Med")]] = subgroup$AT95Med[match(xsic,subgroup$SIC)];
d[[paste0(AT[i],"AT95Mean")]] = subgroup$AT95Mean[match(xsic,subgroup$SIC)];
}
或者您可以使用“重塑”软件包:
对于记录,在base
中还有一个重塑
函数(好的,stats
):
请看我运行您提到的命令后得到的结果。我已经在编辑中发布了我的结果。我出错了。嗯。。。不确定。有一件事是你们在pp上运行melt,而不是pp1——这是有意的还是你们有可能重塑pp?因为我看到melt不包括默认组中的SIC,所以可以替换melt吗
xsic <- unique(pp[,"SIC"]);
AT = c("00", "01", "10", "11")
d = data.frame(xsic=xsic);
for(i in 1:4) {
subgroup = pp[ pp$AT95Group==AT[i],];
d[[paste0(AT[i],"AT95Med")]] = subgroup$AT95Med[match(xsic,subgroup$SIC)];
d[[paste0(AT[i],"AT95Mean")]] = subgroup$AT95Mean[match(xsic,subgroup$SIC)];
}
xsic 00AT95Med 00AT95Mean 01AT95Med 01AT95Mean 10AT95Med 10AT95Mean 11AT95Med 11AT95Mean
1 0.06099742 0.06099742 NA NA 0.06086497 0.06086497 0.06910391 0.06910391
10 0.35131256 0.35131256 0.30530899 0.30530899 0.35270472 0.35270472 0.21557182 0.21557182
12 0.05414319 0.05414319 0.04598877 0.04598877 0.05138754 0.05138754 0.03044662 0.03044662
13 0.15609279 0.15609279 0.14372464 0.14372464 0.09760597 0.09760597 0.06509209 0.06509209
14 0.05130409 0.05130409 0.01888952 0.01888952 0.03568362 0.03568362 0.01166202 0.01166202
15 NA NA 0.09824487 0.09824487 NA NA NA NA
install.packages("reshape") # only run this once if you don't have it
require(reshape)
pp # this is what I called your table
molten<-melt(pp) # this stretches the table out into variable/value pairs
# then modify the "variable" values so they reflect the group (and delete 'AT95')
molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," ")
# then use cast (you can look up the documentation in ?reshape)
# but basically this gives you a crosstab of the SICs against the new variables
# the significant of 1,3,4 is it pulls out only the columns I want to cast
cast(molten[,c(1,3,4)], SIC ~ variable)
SIC Mean 00 Mean 01 Mean 10 Mean 11 Med 00 Med 01 Med 10 Med 11
1 1 0.06099742 NA 0.06086497 0.06910391 0.06099742 NA 0.06086497 0.06910391
2 10 0.35131256 0.30530899 0.35270472 0.21557182 0.35131256 0.30530899 0.35270472 0.21557182
3 12 0.05414319 0.04598877 0.05138754 0.03044662 0.05414319 0.04598877 0.05138754 0.03044662
4 13 0.15609279 0.14372464 0.09760597 0.06509209 0.15609279 0.14372464 0.09760597 0.06509209
5 14 0.05130409 0.01888952 0.03568362 0.01166202 0.05130409 0.01888952 0.03568362 0.01166202
6 15 NA 0.09824487 NA NA NA 0.09824487 NA NA
reshape(pp, direction = "wide", idvar = "SIC",
timevar = "AT95Group", v.names = c("AT95Med", "AT95Mean"))
# SIC AT95Med.11 AT95Mean.11 AT95Med.10 AT95Mean.10 AT95Med.00 AT95Mean.00 AT95Med.01 AT95Mean.01
#241 1 0.06910391 0.06910391 0.06086497 0.06086497 0.06099742 0.06099742 NA NA
#1126 10 0.21557182 0.21557182 0.35270472 0.35270472 0.35131256 0.35131256 0.30530899 0.30530899
#1269 12 0.03044662 0.03044662 0.05138754 0.05138754 0.05414319 0.05414319 0.04598877 0.04598877
#3919 13 0.06509209 0.06509209 0.09760597 0.09760597 0.15609279 0.15609279 0.14372464 0.14372464
#4886 14 0.01166202 0.01166202 0.03568362 0.03568362 0.05130409 0.05130409 0.01888952 0.01888952
#5108 15 NA NA NA NA NA NA 0.09824487 0.09824487