R 按百分比分组不工作占整个组的百分比
我已经搜索了网络和堆栈,但没有运气尝试多个东西。我有一个数据集,我试图得到一个分组计算,但也有该组的百分比。下面是我得到的一个例子:R 按百分比分组不工作占整个组的百分比,r,dplyr,R,Dplyr,我已经搜索了网络和堆栈,但没有运气尝试多个东西。我有一个数据集,我试图得到一个分组计算,但也有该组的百分比。下面是我得到的一个例子: structure(list(Advanced = c("Task5", "Advanced", "0", "2", "1", " 8.500000", "0.0", "3.191667", &q
structure(list(Advanced = c("Task5", "Advanced", "0", "2", "1",
" 8.500000", "0.0", "3.191667", "25.00000", "4.500000", "0.6428571",
"4.50", "0.6428571", "4.500000", "0.6428571", "5.000000", "0.7142857",
"6.500000", "0.9285714", "2", "1.00", NA, NA), Advanced = c("Task5",
"Advanced", "1", "4", "1", "26.500000", "0.5", "9.037500", "63.25000",
"6.000000", "0.8571429", "4.75", "0.6785714", "5.250000", "0.7500000",
"6.000000", "0.8571429", "5.250000", "0.7500000", "1", "0.25",
"4.333333", "0.6190476"), Novice = c("Task5", "Novice", "0",
"2", "1", " 6.000000", "0.0", "4.850000", "49.00000", "6.500000",
"0.9285714", "6.00", "0.8571429", "6.000000", "0.8571429", "6.500000",
"0.9285714", "6.500000", "0.9285714", "2", "1.00", NA, NA), Novice = c("Task5",
"Novice", "1", "6", "1", " 7.666667", "1.0", "8.672222", "69.83333",
"5.333333", "0.7619048", "4.00", "0.5714286", "3.833333", "0.5476190",
"4.333333", "0.6190476", "4.166667", "0.5952381", "0", "0.00",
"4.000000", "0.5714286")), class = "data.frame", row.names = c("Task",
"segment", "t5_pass_fail", "N", "SuccessRate", "UniquePages",
"Timedout_percent", "TimeonTask", "Clicks", "Useful_raw", "Useful_percent",
"UserFriendly_raw", "UserFriendly_percent", "Learned_raw", "Learned_percent",
"Satisfied_raw", "Satisfied_percent", "Confident_raw", "Confident_percent",
"Experienced_Difficulty_raw", "Experienced_Difficulty_percent",
"difficulty_level_raw", "difficulty_level_percent"))
获取上述信息的过程如下所示:
Task5_Strict <- cleanuxq4 %>%
dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
group_by (segment,t5_pass_fail)%>%
summarize(
Task = "Task5",
N =n(),
SuccessRate = ((t5_pass_fail = 1)/sum(t5_pass_fail)),
UniquePages = (mean(t5_unique_pageviews)),
Timedout_percent = sum(t5_effectiveness)/N,
TimeonTask = mean(t5_time_task)/60,
Clicks = mean(t5_clicks),
Useful_raw = mean(t5_useful),
Useful_percent = Useful_raw/7,
UserFriendly_raw = mean(t5_user_friendly),
UserFriendly_percent = UserFriendly_raw/7,
Learned_raw = mean(t5_learned),
Learned_percent = Learned_raw/7,
Satisfied_raw = mean(t5_satisfied),
Satisfied_percent = Satisfied_raw/7,
Confident_raw = mean(t5_confident),
Confident_percent = Confident_raw/7,
Experienced_Difficulty_raw = sum(t5_exp_difficulty),
Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
difficulty_level_percent = difficulty_level_raw/7
)
#Move columsn and then use the second row as the header for the pivot columns--------
Task5_Strict <- Task5_Strict %>%
select("segment",everything())%>%
select("Task",everything())
Task5_Strict_Pivot <- as.data.frame(t(Task5_Strict))
zTask5_Strict_Pivot <-Task5_Strict_Pivot[-2,]
colnames(Task5_Strict_Pivot)<-Task5_Strict_Pivot[2,]
Task5\u%
dplyr::select((“urespid_0”),(“段”),以(“t”)开头%>%
分组依据(分段,t5通过/失败)%>%
总结(
Task=“Task5”,
N=N(),
成功率=((t5通过\失败=1)/总和(t5通过\失败)),
唯一页面=(平均值(t5_唯一页面浏览量)),
Timedout\u百分比=总和(t5\u有效性)/N,
TimeonTask=平均值(t5时间任务)/60,
点击次数=平均值(t5_点击次数),
有用_原始=平均值(t5_有用),
有用百分比=有用原始值/7,
用户友好型=平均值(t5用户友好型),
用户友好型\u百分比=用户友好型\u原始/7,
已学习=平均值(t5已学习),
已学习百分比=已学习原始/7,
满意的原始=平均值(t5满意),
满意率=满意率/7,
自信度=平均值(t5自信度),
信心百分比=信心原始/7,
经验难度=总和(t5经验难度),
经验难度百分比=经验难度原始/N,
难度等级原始=平均值(t5难度等级,na.rm=真),
难度等级百分比=难度等级原始/7
)
#移动列n,然后将第二行用作轴列的标题--------
任务5_%
选择(“段”,所有内容())%>%
选择(“任务”,所有内容())
任务5\u Strict\u Pivot要计算成功率
,您可以使用平均值(t5\u pass\u fail==1)*100
library(dplyr)
Task5_Strict <- cleanuxq4 %>%
dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
group_by(segment,t5_pass_fail)%>%
summarize(
Task = "Task5",
N =n(),
SuccessRate = mean(t5_pass_fail == 1) * 100,
UniquePages = (mean(t5_unique_pageviews)),
Timedout_percent = sum(t5_effectiveness)/N,
TimeonTask = mean(t5_time_task)/60,
Clicks = mean(t5_clicks),
Useful_raw = mean(t5_useful),
Useful_percent = Useful_raw/7,
UserFriendly_raw = mean(t5_user_friendly),
UserFriendly_percent = UserFriendly_raw/7,
Learned_raw = mean(t5_learned),
Learned_percent = Learned_raw/7,
Satisfied_raw = mean(t5_satisfied),
Satisfied_percent = Satisfied_raw/7,
Confident_raw = mean(t5_confident),
Confident_percent = Confident_raw/7,
Experienced_Difficulty_raw = sum(t5_exp_difficulty),
Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
difficulty_level_percent = difficulty_level_raw/7
)
库(dplyr)
任务5_%
dplyr::select((“urespid_0”),(“段”),以(“t”)开头%>%
分组依据(分段,t5通过/失败)%>%
总结(
Task=“Task5”,
N=N(),
成功率=平均值(t5通过率=1)*100,
唯一页面=(平均值(t5_唯一页面浏览量)),
Timedout\u百分比=总和(t5\u有效性)/N,
TimeonTask=平均值(t5时间任务)/60,
点击次数=平均值(t5_点击次数),
有用_原始=平均值(t5_有用),
有用百分比=有用原始值/7,
用户友好型=平均值(t5用户友好型),
用户友好型\u百分比=用户友好型\u原始/7,
已学习=平均值(t5已学习),
已学习百分比=已学习原始/7,
满意的原始=平均值(t5满意),
满意率=满意率/7,
自信度=平均值(t5自信度),
信心百分比=信心原始/7,
经验难度=总和(t5经验难度),
经验难度百分比=经验难度原始/N,
难度等级原始=平均值(t5难度等级,na.rm=真),
难度等级百分比=难度等级原始/7
)
多亏了Stefan和Ronak,但我试图做一些不合逻辑的事情。我需要将段的总和移到group by之外,然后使用它在group by内获得适当的百分比
解决方案在最底层:
Task5_Strict <- cleanuxq4 %>%
dplyr:: select(("urespid_0"),("segment"),starts_with("t")) %>%
group_by (segment,t5_pass_fail)%>%
summarize(
Task = "Task5",
N =n(),
UniquePages = (mean(t5_unique_pageviews)),
Timedout_percent = sum(t5_effectiveness)/N,
TimeonTask = mean(t5_time_task)/60,
Clicks = mean(t5_clicks),
Useful_raw = mean(t5_useful),
Useful_percent = Useful_raw/7,
UserFriendly_raw = mean(t5_user_friendly),
UserFriendly_percent = UserFriendly_raw/7,
Learned_raw = mean(t5_learned),
Learned_percent = Learned_raw/7,
Satisfied_raw = mean(t5_satisfied),
Satisfied_percent = Satisfied_raw/7,
Confident_raw = mean(t5_confident),
Confident_percent = Confident_raw/7,
Experienced_Difficulty_raw = sum(t5_exp_difficulty),
Experienced_Difficulty_percent = Experienced_Difficulty_raw/N,
difficulty_level_raw = mean(t5_difficulty_level, na.rm=TRUE),
difficulty_level_percent = difficulty_level_raw/7
)
Task5_Strict <- mutate(Task5_Strict, zcount = sum(N))
Task5_Strict <- mutate(Task5_Strict, SuccessRate =(N)/(zcount))
Task5\u%
dplyr::select((“urespid_0”),(“段”),以(“t”)开头%>%
分组依据(分段,t5通过/失败)%>%
总结(
Task=“Task5”,
N=N(),
唯一页面=(平均值(t5_唯一页面浏览量)),
Timedout\u百分比=总和(t5\u有效性)/N,
TimeonTask=平均值(t5时间任务)/60,
点击次数=平均值(t5_点击次数),
有用_原始=平均值(t5_有用),
有用百分比=有用原始值/7,
用户友好型=平均值(t5用户友好型),
用户友好型\u百分比=用户友好型\u原始/7,
已学习=平均值(t5已学习),
已学习百分比=已学习原始/7,
满意的原始=平均值(t5满意),
满意率=满意率/7,
自信度=平均值(t5自信度),
信心百分比=信心原始/7,
经验难度=总和(t5经验难度),
经验难度百分比=经验难度原始/N,
难度等级原始=平均值(t5难度等级,na.rm=真),
难度等级百分比=难度等级原始/7
)
Task5\u Strict你确定要按t5\u pass\u fail
分组吗?@stefan我按t5\u pass\u fail分组的原因是我需要查看通过和未通过任务者之间的分数差异。如果有更好的方法,我愿意学习。理想情况下,我想要的东西看起来像@RonakShah,这是我想要得到的,这样我就可以欣赏我的尝试,我已经尝试过了,但它仍然给出了一个基于分组的百分比;t5通过/不通过。我只需要找到一种方法,得到该段的N,然后/再按细分得到它。@mdutton27好吧,如果没有数据,很难共享一个答案。我想您共享的数据属于Task5\u严格的。我们需要cleanuxq4
的数据来确认答案是否符合您的期望。如stefan所述,您可以尝试从group\u by
中删除t5\u pass\u fail
。也许我做错了,但我确实对我的问题进行了dput(),如果这不是共享数据的正确方式,请让我知道以后的情况。然而,在你和Stefan之间,我只是找到了答案,因为我需要在组外创建一个和,然后在组内使用它们N来获得适当的%。虽然你们两个都帮了忙,但这并不漂亮,所以谢谢你们。