R循环到新数据帧摘要加权
我有一个很高的数据帧:R循环到新数据帧摘要加权,r,weighted,psych,R,Weighted,Psych,我有一个很高的数据帧: data = data.frame("id"=c(1,2,3,4,5,6,7,8,9,10), "group"=c(1,1,2,1,2,2,2,2,1,2), "type"=c(1,1,2,3,2,2,3,3,3,1), "score1"=c(sample(1:4,10,r=T)), "score2"=c(sample(1:4,1
data = data.frame("id"=c(1,2,3,4,5,6,7,8,9,10),
"group"=c(1,1,2,1,2,2,2,2,1,2),
"type"=c(1,1,2,3,2,2,3,3,3,1),
"score1"=c(sample(1:4,10,r=T)),
"score2"=c(sample(1:4,10,r=T)),
"score3"=c(sample(1:4,10,r=T)),
"score4"=c(sample(1:4,10,r=T)),
"score5"=c(sample(1:4,10,r=T)),
"weight1"=c(173,109,136,189,186,146,173,102,178,174),
"weight2"=c(147,187,125,126,120,165,142,129,144,197),
"weight3"=c(103,192,102,159,128,179,195,193,135,145),
"weight4"=c(114,182,199,101,111,116,198,123,119,181),
"weight5"=c(159,125,104,171,166,154,197,124,180,154))
library(reshape2)
library(plyr)
data1 <- reshape(data, direction = "long",
varying = list(c(paste0("score",1:5)),c(paste0("weight",1:5))),
v.names = c("score","weight"),
idvar = "id", timevar = "count", times = c(1:5))
data1 <- data1[order(data1$id), ]
我只是不知道如何计算加权计数,它应该将权重应用于分数变量,因此它在“加权计数”列中给出一个加权计数,该计数按分数、组和类型进行聚合。一个选项是
熔化
(从数据.表格
-可采取多个测量
模式
,然后按“组”分组,,“键入”获取计数
library(data.table)
library(dplyr)
melt(setDT(data), measure = patterns('^score', "^weight"),
value.name = c("score", "weight")) %>%
group_by(group, type) %>%
count(score, wt = weight)
如果我们需要一套完整的组合
library(tidyr)
melt(setDT(data), measure = patterns('^score', "^weight"),
value.name = c("score", "weight")) %>%
group_by(group, type) %>%
ungroup %>%
complete(group, type, score, fill = list(n = 0))
如果我理解正确,
weightedCOUNT
是按得分
、分组
和类型
分组的权重之和
为了完整起见,我想展示在纯base R和纯data.table
syntax中实现时的外观
基尔
OP就快到了。他已经将多值变量的数据从宽格式改为长格式。只缺少最后的聚合步骤:
data1 <- reshape(data, direction = "long",
varying = list(c(paste0("score",1:5)),c(paste0("weight",1:5))),
v.names = c("score","weight"),
idvar = "id", timevar = "count", times = c(1:5))
result <- aggregate(weight ~ score + group + type, data1, FUN = sum)
result
结果
可以按重新排序
with(result, result[order(score, group, type), ])
数据表
如所示,data中的melt()
。table
包可以与dplyr
组合。或者,我们可以使用数据。table
语法进行聚合:
library(data.table)
cols <- c("score", "weight") # to save typing
melt(setDT(data), measure = patterns(cols), value.name = cols)[
, .(weightedCOUNT = sum(weight)), keyby = .(score, group, type)]
keyby
参数用于一步对输出进行分组和排序
在数据中也可以完成缺少的分组变量组合。表语法使用交叉连接函数CJ()
:
你能展示一下加权计数的计算方法吗?我不知道怎么做,但我认为可以用以下代码来完成:count(data1,score,wt=weight)
with(result, result[order(score, group, type), ])
score group type weight
1 1 1 1 479
12 1 1 3 189
4 1 2 1 378
8 1 2 2 535
16 1 2 3 596
13 2 1 3 351
5 2 2 1 154
9 2 2 2 855
17 2 2 3 265
2 3 1 1 558
14 3 1 3 600
6 3 2 1 174
10 3 2 2 248
18 3 2 3 193
3 4 1 1 454
15 4 1 3 362
7 4 2 1 145
11 4 2 2 499
19 4 2 3 522
library(data.table)
cols <- c("score", "weight") # to save typing
melt(setDT(data), measure = patterns(cols), value.name = cols)[
, .(weightedCOUNT = sum(weight)), keyby = .(score, group, type)]
score group type weightedCOUNT
1: 1 1 1 479
2: 1 1 3 189
3: 1 2 1 378
4: 1 2 2 535
5: 1 2 3 596
6: 2 1 3 351
7: 2 2 1 154
8: 2 2 2 855
9: 2 2 3 265
10: 3 1 1 558
11: 3 1 3 600
12: 3 2 1 174
13: 3 2 2 248
14: 3 2 3 193
15: 4 1 1 454
16: 4 1 3 362
17: 4 2 1 145
18: 4 2 2 499
19: 4 2 3 522
melt(setDT(data), measure = patterns(cols), value.name = cols)[
, .(weightedCOUNT = sum(weight)), keyby = .(score, group, type)][
CJ(score, group, type, unique = TRUE), on = .(score, group, type)][
is.na(weightedCOUNT), weightedCOUNT := 0][]
score group type weightedCOUNT
1: 1 1 1 479
2: 1 1 2 0
3: 1 1 3 189
4: 1 2 1 378
5: 1 2 2 535
6: 1 2 3 596
7: 2 1 1 0
8: 2 1 2 0
9: 2 1 3 351
10: 2 2 1 154
11: 2 2 2 855
12: 2 2 3 265
13: 3 1 1 558
14: 3 1 2 0
15: 3 1 3 600
16: 3 2 1 174
17: 3 2 2 248
18: 3 2 3 193
19: 4 1 1 454
20: 4 1 2 0
21: 4 1 3 362
22: 4 2 1 145
23: 4 2 2 499
24: 4 2 3 522
score group type weightedCOUNT