R 手动计算分类评级计数数据的差异
我试图从分类评级计数数据中手动计算方差(和平均值)R 手动计算分类评级计数数据的差异,r,R,我试图从分类评级计数数据中手动计算方差(和平均值) Item <- c("A", "B", "C", "D") cat1 <- c(4,12,17,NA) cat2 <- c(NA,10,20,15) cat3 <- c(17,5,12,6) cat4 <- c(10,12,17,NA) cat5 <- c(3,21,NA,16) cat6 <- c(2,14,12,20) cat7 <- c(7,NA,18,23) Data <- dat
Item <- c("A", "B", "C", "D")
cat1 <- c(4,12,17,NA)
cat2 <- c(NA,10,20,15)
cat3 <- c(17,5,12,6)
cat4 <- c(10,12,17,NA)
cat5 <- c(3,21,NA,16)
cat6 <- c(2,14,12,20)
cat7 <- c(7,NA,18,23)
Data <- data.frame(Item=Item, Never=cat1,Rarely=cat2,Occasionally=cat3, Sometimes=cat4,Frequently=cat5,Usually=cat6,Always=cat7,stringsAsFactors=FALSE)
Data
Item Never Rarely Occasionally Sometimes Frequently Usually Always
1 A 4 NA 17 10 3 2 7
2 B 12 10 5 12 21 14 NA
3 C 17 20 12 17 NA 12 18
4 D NA 15 6 NA 16 20 23
而
值
是每个数值评分
按项目
交叉点对应的单元格。我建议您在应用计算之前尝试重新调整数据集,因为这样会更容易
library(dplyr)
library(tidyr)
Item <- c("A", "B", "C", "D")
cat1 <- c(4,12,17,NA)
cat2 <- c(NA,10,20,15)
cat3 <- c(17,5,12,6)
cat4 <- c(10,12,17,NA)
cat5 <- c(3,21,NA,16)
cat6 <- c(2,14,12,20)
cat7 <- c(7,NA,18,23)
Data <- data.frame(Item=Item, Never=cat1,Rarely=cat2,Occasionally=cat3, Sometimes=cat4,Frequently=cat5,Usually=cat6,Always=cat7,stringsAsFactors=FALSE)
Data %>%
gather(category, value, -Item) %>% # reshape dataset
mutate(Rating = recode(category, "Never"=1,"Rarely" = 2,"Occasionally" = 3,
"Sometimes" = 4,"Frequently" = 5,
"Usually" = 6,"Always" = 7)) %>% # assign rating
group_by(Item) %>% # for each item
mutate(Avg = sum(Rating*value, na.rm=T) / sum(value, na.rm=T), # calculate Avg
variance = sum(abs(Rating - Avg)*value, na.rm=T) / sum(value, na.rm=T)) %>% # calculate Variance using the Avg
ungroup() %>% # forget the grouping
select(-Rating) %>% # no need the rating any more
spread(category, value) %>% # reshape back to original form
select_(.dots = c(names(Data), "Avg", "variance")) # get columns in the desired order
# # A tibble: 4 x 10
# Item Never Rarely Occasionally Sometimes Frequently Usually Always Avg variance
# * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 A 4 NA 17 10 3 2 7 3.976744 1.326122
# 2 B 12 10 5 12 21 14 NA 3.837838 1.530314
# 3 C 17 20 12 17 NA 12 18 3.739583 1.879991
# 4 D NA 15 6 NA 16 20 23 5.112500 1.529062
库(dplyr)
图书馆(tidyr)
项目I在上面得到一个错误:重新编码时出错(类别,从不=1,很少=2,偶尔=3,有时=4,:未使用的参数(从不=1,很少=2,偶尔=3,有时=4,经常=5,通常=6,总是=7)
我通过显式调用dplyr recode和select函数修复了未使用的参数错误。但是,我得到了所有四项的平均值和方差值。经过一些搜索,我尝试分离plyr-问题解决了!谢谢!我最初的方差计算错误。使用接受的答案,它应该是mutate(平均频率分数=总和(评级*值,na.rm=T)/总和(值,na.rm=T),#计算平均频率分数=总和(((评级-平均频率分数)^2)*值,na.rm=T)/总和(值,na.rm=T))
Data$Rating_var <- rowSums((Numeric_Rating - Avg_rating)*Value,na.rm=TRUE)/rowSums(Data[,2:8],na.rm=TRUE))
Never = 1
Rarely = 2
Occasionally = 3
Sometimes = 4
Frequently = 5
Usually = 6
Always = 7
library(dplyr)
library(tidyr)
Item <- c("A", "B", "C", "D")
cat1 <- c(4,12,17,NA)
cat2 <- c(NA,10,20,15)
cat3 <- c(17,5,12,6)
cat4 <- c(10,12,17,NA)
cat5 <- c(3,21,NA,16)
cat6 <- c(2,14,12,20)
cat7 <- c(7,NA,18,23)
Data <- data.frame(Item=Item, Never=cat1,Rarely=cat2,Occasionally=cat3, Sometimes=cat4,Frequently=cat5,Usually=cat6,Always=cat7,stringsAsFactors=FALSE)
Data %>%
gather(category, value, -Item) %>% # reshape dataset
mutate(Rating = recode(category, "Never"=1,"Rarely" = 2,"Occasionally" = 3,
"Sometimes" = 4,"Frequently" = 5,
"Usually" = 6,"Always" = 7)) %>% # assign rating
group_by(Item) %>% # for each item
mutate(Avg = sum(Rating*value, na.rm=T) / sum(value, na.rm=T), # calculate Avg
variance = sum(abs(Rating - Avg)*value, na.rm=T) / sum(value, na.rm=T)) %>% # calculate Variance using the Avg
ungroup() %>% # forget the grouping
select(-Rating) %>% # no need the rating any more
spread(category, value) %>% # reshape back to original form
select_(.dots = c(names(Data), "Avg", "variance")) # get columns in the desired order
# # A tibble: 4 x 10
# Item Never Rarely Occasionally Sometimes Frequently Usually Always Avg variance
# * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 A 4 NA 17 10 3 2 7 3.976744 1.326122
# 2 B 12 10 5 12 21 14 NA 3.837838 1.530314
# 3 C 17 20 12 17 NA 12 18 3.739583 1.879991
# 4 D NA 15 6 NA 16 20 23 5.112500 1.529062