R 手动计算分类评级计数数据的差异

R 手动计算分类评级计数数据的差异,r,R,我试图从分类评级计数数据中手动计算方差(和平均值) Item <- c("A", "B", "C", "D") cat1 <- c(4,12,17,NA) cat2 <- c(NA,10,20,15) cat3 <- c(17,5,12,6) cat4 <- c(10,12,17,NA) cat5 <- c(3,21,NA,16) cat6 <- c(2,14,12,20) cat7 <- c(7,NA,18,23) Data <- dat

我试图从分类评级计数数据中手动计算方差(和平均值)

Item <- c("A", "B", "C", "D")
cat1 <- c(4,12,17,NA)
cat2 <- c(NA,10,20,15)
cat3 <- c(17,5,12,6)
cat4 <- c(10,12,17,NA)
cat5 <- c(3,21,NA,16)
cat6 <- c(2,14,12,20)
cat7 <- c(7,NA,18,23)

Data <- data.frame(Item=Item, Never=cat1,Rarely=cat2,Occasionally=cat3, Sometimes=cat4,Frequently=cat5,Usually=cat6,Always=cat7,stringsAsFactors=FALSE)

Data

  Item Never Rarely Occasionally Sometimes Frequently Usually Always
1    A     4     NA           17        10          3       2      7
2    B    12     10            5        12         21      14     NA
3    C    17     20           12        17         NA      12     18
4    D    NA     15            6        NA         16      20     23

是每个
数值评分
项目
交叉点对应的单元格。

我建议您在应用计算之前尝试重新调整数据集,因为这样会更容易

library(dplyr)
library(tidyr)


Item <- c("A", "B", "C", "D")
cat1 <- c(4,12,17,NA)
cat2 <- c(NA,10,20,15)
cat3 <- c(17,5,12,6)
cat4 <- c(10,12,17,NA)
cat5 <- c(3,21,NA,16)
cat6 <- c(2,14,12,20)
cat7 <- c(7,NA,18,23)

Data <- data.frame(Item=Item, Never=cat1,Rarely=cat2,Occasionally=cat3, Sometimes=cat4,Frequently=cat5,Usually=cat6,Always=cat7,stringsAsFactors=FALSE)


Data %>%
  gather(category, value, -Item) %>%                                                  # reshape dataset
  mutate(Rating = recode(category, "Never"=1,"Rarely" = 2,"Occasionally" = 3,         
                                   "Sometimes" = 4,"Frequently" = 5,
                                   "Usually" = 6,"Always" = 7)) %>%                   # assign rating 
  group_by(Item) %>%                                                                  # for each item
  mutate(Avg = sum(Rating*value, na.rm=T) / sum(value, na.rm=T),                      # calculate Avg
         variance = sum(abs(Rating - Avg)*value, na.rm=T) / sum(value, na.rm=T)) %>%  # calculate Variance using the Avg
  ungroup() %>%                                                                       # forget the grouping
  select(-Rating) %>%                                                                 # no need the rating any more
  spread(category, value) %>%                                                         # reshape back to original form
  select_(.dots = c(names(Data), "Avg", "variance"))                                  # get columns in the desired order


# # A tibble: 4 x 10
#    Item Never Rarely Occasionally Sometimes Frequently Usually Always      Avg variance
# * <chr> <dbl>  <dbl>        <dbl>     <dbl>      <dbl>   <dbl>  <dbl>    <dbl>    <dbl>
# 1     A     4     NA           17        10          3       2      7 3.976744 1.326122
# 2     B    12     10            5        12         21      14     NA 3.837838 1.530314
# 3     C    17     20           12        17         NA      12     18 3.739583 1.879991
# 4     D    NA     15            6        NA         16      20     23 5.112500 1.529062
库(dplyr)
图书馆(tidyr)

项目I在上面得到一个错误:
重新编码时出错(类别,从不=1,很少=2,偶尔=3,有时=4,:未使用的参数(从不=1,很少=2,偶尔=3,有时=4,经常=5,通常=6,总是=7)
我通过显式调用dplyr recode和select函数修复了未使用的参数错误。但是,我得到了所有四项的平均值和方差值。经过一些搜索,我尝试分离plyr-问题解决了!谢谢!我最初的方差计算错误。使用接受的答案,它应该是
mutate(平均频率分数=总和(评级*值,na.rm=T)/总和(值,na.rm=T),#计算平均频率分数=总和(((评级-平均频率分数)^2)*值,na.rm=T)/总和(值,na.rm=T))
Data$Rating_var <- rowSums((Numeric_Rating - Avg_rating)*Value,na.rm=TRUE)/rowSums(Data[,2:8],na.rm=TRUE))
Never = 1
Rarely = 2
Occasionally = 3
Sometimes = 4
Frequently = 5
Usually = 6
Always = 7
library(dplyr)
library(tidyr)


Item <- c("A", "B", "C", "D")
cat1 <- c(4,12,17,NA)
cat2 <- c(NA,10,20,15)
cat3 <- c(17,5,12,6)
cat4 <- c(10,12,17,NA)
cat5 <- c(3,21,NA,16)
cat6 <- c(2,14,12,20)
cat7 <- c(7,NA,18,23)

Data <- data.frame(Item=Item, Never=cat1,Rarely=cat2,Occasionally=cat3, Sometimes=cat4,Frequently=cat5,Usually=cat6,Always=cat7,stringsAsFactors=FALSE)


Data %>%
  gather(category, value, -Item) %>%                                                  # reshape dataset
  mutate(Rating = recode(category, "Never"=1,"Rarely" = 2,"Occasionally" = 3,         
                                   "Sometimes" = 4,"Frequently" = 5,
                                   "Usually" = 6,"Always" = 7)) %>%                   # assign rating 
  group_by(Item) %>%                                                                  # for each item
  mutate(Avg = sum(Rating*value, na.rm=T) / sum(value, na.rm=T),                      # calculate Avg
         variance = sum(abs(Rating - Avg)*value, na.rm=T) / sum(value, na.rm=T)) %>%  # calculate Variance using the Avg
  ungroup() %>%                                                                       # forget the grouping
  select(-Rating) %>%                                                                 # no need the rating any more
  spread(category, value) %>%                                                         # reshape back to original form
  select_(.dots = c(names(Data), "Avg", "variance"))                                  # get columns in the desired order


# # A tibble: 4 x 10
#    Item Never Rarely Occasionally Sometimes Frequently Usually Always      Avg variance
# * <chr> <dbl>  <dbl>        <dbl>     <dbl>      <dbl>   <dbl>  <dbl>    <dbl>    <dbl>
# 1     A     4     NA           17        10          3       2      7 3.976744 1.326122
# 2     B    12     10            5        12         21      14     NA 3.837838 1.530314
# 3     C    17     20           12        17         NA      12     18 3.739583 1.879991
# 4     D    NA     15            6        NA         16      20     23 5.112500 1.529062