Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/70.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 按百分比分组不工作占整个组的百分比_R_Dplyr - Fatal编程技术网

R 按百分比分组不工作占整个组的百分比

R 按百分比分组不工作占整个组的百分比,r,dplyr,R,Dplyr,我已经搜索了网络和堆栈,但没有运气尝试多个东西。我有一个数据集,我试图得到一个分组计算,但也有该组的百分比。下面是我得到的一个例子: structure(list(Advanced = c("Task5", "Advanced", "0", "2", "1", " 8.500000", "0.0", "3.191667", &q

我已经搜索了网络和堆栈,但没有运气尝试多个东西。我有一个数据集,我试图得到一个分组计算,但也有该组的百分比。下面是我得到的一个例子:

    structure(list(Advanced = c("Task5", "Advanced", "0", "2", "1", 
" 8.500000", "0.0", "3.191667", "25.00000", "4.500000", "0.6428571", 
"4.50", "0.6428571", "4.500000", "0.6428571", "5.000000", "0.7142857", 
"6.500000", "0.9285714", "2", "1.00", NA, NA), Advanced = c("Task5", 
"Advanced", "1", "4", "1", "26.500000", "0.5", "9.037500", "63.25000", 
"6.000000", "0.8571429", "4.75", "0.6785714", "5.250000", "0.7500000", 
"6.000000", "0.8571429", "5.250000", "0.7500000", "1", "0.25", 
"4.333333", "0.6190476"), Novice = c("Task5", "Novice", "0", 
"2", "1", " 6.000000", "0.0", "4.850000", "49.00000", "6.500000", 
"0.9285714", "6.00", "0.8571429", "6.000000", "0.8571429", "6.500000", 
"0.9285714", "6.500000", "0.9285714", "2", "1.00", NA, NA), Novice = c("Task5", 
"Novice", "1", "6", "1", " 7.666667", "1.0", "8.672222", "69.83333", 
"5.333333", "0.7619048", "4.00", "0.5714286", "3.833333", "0.5476190", 
"4.333333", "0.6190476", "4.166667", "0.5952381", "0", "0.00", 
"4.000000", "0.5714286")), class = "data.frame", row.names = c("Task", 
"segment", "t5_pass_fail", "N", "SuccessRate", "UniquePages", 
"Timedout_percent", "TimeonTask", "Clicks", "Useful_raw", "Useful_percent", 
"UserFriendly_raw", "UserFriendly_percent", "Learned_raw", "Learned_percent", 
"Satisfied_raw", "Satisfied_percent", "Confident_raw", "Confident_percent", 
"Experienced_Difficulty_raw", "Experienced_Difficulty_percent", 
"difficulty_level_raw", "difficulty_level_percent"))
获取上述信息的过程如下所示:

    Task5_Strict <- cleanuxq4 %>% 
  dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
  group_by (segment,t5_pass_fail)%>%
  summarize(
    Task = "Task5",
    N =n(),
    SuccessRate = ((t5_pass_fail = 1)/sum(t5_pass_fail)),
    UniquePages = (mean(t5_unique_pageviews)),
    Timedout_percent = sum(t5_effectiveness)/N,
    TimeonTask = mean(t5_time_task)/60,
    Clicks = mean(t5_clicks),
    Useful_raw = mean(t5_useful),
    Useful_percent = Useful_raw/7,
    UserFriendly_raw = mean(t5_user_friendly),
    UserFriendly_percent = UserFriendly_raw/7,
    Learned_raw = mean(t5_learned),
    Learned_percent = Learned_raw/7,
    Satisfied_raw = mean(t5_satisfied),
    Satisfied_percent = Satisfied_raw/7,
    Confident_raw = mean(t5_confident),
    Confident_percent = Confident_raw/7,
    Experienced_Difficulty_raw = sum(t5_exp_difficulty),
    Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
    difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
    difficulty_level_percent = difficulty_level_raw/7
  )
#Move columsn and then use the second row as the header for the pivot columns--------


 Task5_Strict <- Task5_Strict %>%
      select("segment",everything())%>%
      select("Task",everything())

Task5_Strict_Pivot <- as.data.frame(t(Task5_Strict))
zTask5_Strict_Pivot <-Task5_Strict_Pivot[-2,]
colnames(Task5_Strict_Pivot)<-Task5_Strict_Pivot[2,]
Task5\u%
dplyr::select((“urespid_0”),(“段”),以(“t”)开头%>%
分组依据(分段,t5通过/失败)%>%
总结(
Task=“Task5”,
N=N(),
成功率=((t5通过\失败=1)/总和(t5通过\失败)),
唯一页面=(平均值(t5_唯一页面浏览量)),
Timedout\u百分比=总和(t5\u有效性)/N,
TimeonTask=平均值(t5时间任务)/60,
点击次数=平均值(t5_点击次数),
有用_原始=平均值(t5_有用),
有用百分比=有用原始值/7,
用户友好型=平均值(t5用户友好型),
用户友好型\u百分比=用户友好型\u原始/7,
已学习=平均值(t5已学习),
已学习百分比=已学习原始/7,
满意的原始=平均值(t5满意),
满意率=满意率/7,
自信度=平均值(t5自信度),
信心百分比=信心原始/7,
经验难度=总和(t5经验难度),
经验难度百分比=经验难度原始/N,
难度等级原始=平均值(t5难度等级,na.rm=真),
难度等级百分比=难度等级原始/7
)
#移动列n,然后将第二行用作轴列的标题--------
任务5_%
选择(“段”,所有内容())%>%
选择(“任务”,所有内容())

任务5\u Strict\u Pivot要计算
成功率
,您可以使用
平均值(t5\u pass\u fail==1)*100

library(dplyr) 

Task5_Strict <- cleanuxq4 %>% 
  dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
  group_by(segment,t5_pass_fail)%>%
  summarize(
    Task = "Task5",
    N =n(),
    SuccessRate = mean(t5_pass_fail == 1) * 100,
    UniquePages = (mean(t5_unique_pageviews)),
    Timedout_percent = sum(t5_effectiveness)/N,
    TimeonTask = mean(t5_time_task)/60,
    Clicks = mean(t5_clicks),
    Useful_raw = mean(t5_useful),
    Useful_percent = Useful_raw/7,
    UserFriendly_raw = mean(t5_user_friendly),
    UserFriendly_percent = UserFriendly_raw/7,
    Learned_raw = mean(t5_learned),
    Learned_percent = Learned_raw/7,
    Satisfied_raw = mean(t5_satisfied),
    Satisfied_percent = Satisfied_raw/7,
    Confident_raw = mean(t5_confident),
    Confident_percent = Confident_raw/7,
    Experienced_Difficulty_raw = sum(t5_exp_difficulty),
    Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
    difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
    difficulty_level_percent = difficulty_level_raw/7
  )
库(dplyr)
任务5_%
dplyr::select((“urespid_0”),(“段”),以(“t”)开头%>%
分组依据(分段,t5通过/失败)%>%
总结(
Task=“Task5”,
N=N(),
成功率=平均值(t5通过率=1)*100,
唯一页面=(平均值(t5_唯一页面浏览量)),
Timedout\u百分比=总和(t5\u有效性)/N,
TimeonTask=平均值(t5时间任务)/60,
点击次数=平均值(t5_点击次数),
有用_原始=平均值(t5_有用),
有用百分比=有用原始值/7,
用户友好型=平均值(t5用户友好型),
用户友好型\u百分比=用户友好型\u原始/7,
已学习=平均值(t5已学习),
已学习百分比=已学习原始/7,
满意的原始=平均值(t5满意),
满意率=满意率/7,
自信度=平均值(t5自信度),
信心百分比=信心原始/7,
经验难度=总和(t5经验难度),
经验难度百分比=经验难度原始/N,
难度等级原始=平均值(t5难度等级,na.rm=真),
难度等级百分比=难度等级原始/7
)

多亏了Stefan和Ronak,但我试图做一些不合逻辑的事情。我需要将段的总和移到group by之外,然后使用它在group by内获得适当的百分比

解决方案在最底层:

    Task5_Strict <- cleanuxq4 %>% 
  dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
  group_by (segment,t5_pass_fail)%>%
  summarize(
    Task = "Task5",
    N =n(),
    UniquePages = (mean(t5_unique_pageviews)),
    Timedout_percent = sum(t5_effectiveness)/N,
    TimeonTask = mean(t5_time_task)/60,
    Clicks = mean(t5_clicks),
    Useful_raw = mean(t5_useful),
    Useful_percent = Useful_raw/7,
    UserFriendly_raw = mean(t5_user_friendly),
    UserFriendly_percent = UserFriendly_raw/7,
    Learned_raw = mean(t5_learned),
    Learned_percent = Learned_raw/7,
    Satisfied_raw = mean(t5_satisfied),
    Satisfied_percent = Satisfied_raw/7,
    Confident_raw = mean(t5_confident),
    Confident_percent = Confident_raw/7,
    Experienced_Difficulty_raw = sum(t5_exp_difficulty),
    Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
    difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
    difficulty_level_percent = difficulty_level_raw/7
      )
    Task5_Strict <- mutate(Task5_Strict, zcount = sum(N))
    Task5_Strict <- mutate(Task5_Strict, SuccessRate =(N)/(zcount))
Task5\u%
dplyr::select((“urespid_0”),(“段”),以(“t”)开头%>%
分组依据(分段,t5通过/失败)%>%
总结(
Task=“Task5”,
N=N(),
唯一页面=(平均值(t5_唯一页面浏览量)),
Timedout\u百分比=总和(t5\u有效性)/N,
TimeonTask=平均值(t5时间任务)/60,
点击次数=平均值(t5_点击次数),
有用_原始=平均值(t5_有用),
有用百分比=有用原始值/7,
用户友好型=平均值(t5用户友好型),
用户友好型\u百分比=用户友好型\u原始/7,
已学习=平均值(t5已学习),
已学习百分比=已学习原始/7,
满意的原始=平均值(t5满意),
满意率=满意率/7,
自信度=平均值(t5自信度),
信心百分比=信心原始/7,
经验难度=总和(t5经验难度),
经验难度百分比=经验难度原始/N,
难度等级原始=平均值(t5难度等级,na.rm=真),
难度等级百分比=难度等级原始/7
)

Task5\u Strict你确定要按
t5\u pass\u fail
分组吗?@stefan我按t5\u pass\u fail分组的原因是我需要查看通过和未通过任务者之间的分数差异。如果有更好的方法,我愿意学习。理想情况下,我想要的东西看起来像@RonakShah,这是我想要得到的,这样我就可以欣赏我的尝试,我已经尝试过了,但它仍然给出了一个基于分组的百分比;t5通过/不通过。我只需要找到一种方法,得到该段的N,然后/再按细分得到它。@mdutton27好吧,如果没有数据,很难共享一个答案。我想您共享的数据属于
Task5\u严格的
。我们需要
cleanuxq4
的数据来确认答案是否符合您的期望。如stefan所述,您可以尝试从
group\u by
中删除
t5\u pass\u fail
。也许我做错了,但我确实对我的问题进行了dput(),如果这不是共享数据的正确方式,请让我知道以后的情况。然而,在你和Stefan之间,我只是找到了答案,因为我需要在组外创建一个和,然后在组内使用它们N来获得适当的%。虽然你们两个都帮了忙,但这并不漂亮,所以谢谢你们。