R 具有不同结构的2个数据帧的组合
我寻找一种有效的方法来组合这两个数据帧: 一个包含问题及其答案和相关要点(按行组织的问题)R 具有不同结构的2个数据帧的组合,r,merge,R,Merge,我寻找一种有效的方法来组合这两个数据帧: 一个包含问题及其答案和相关要点(按行组织的问题) 答案库(tidyverse) tmp% 聚集(num_question,can_ans,-cand)%>%#将extract data.frame转换为长格式 left_join(answer,by=“num_question”)%%>%#将按问题编号提取的内容合并到单个data.frame中 mutate(correct=(can_ans==ans)+0)%>%#候选答案是否与正确答案相同(1=TRUE
答案库(tidyverse)
tmp%
聚集(num_question,can_ans,-cand)%>%#将extract data.frame转换为长格式
left_join(answer,by=“num_question”)%%>%#将按问题编号提取的内容合并到单个data.frame中
mutate(correct=(can_ans==ans)+0)%>%#候选答案是否与正确答案相同(1=TRUE,0=FALSE)
变异(结果=正确*点)#将正确答案(1)乘以给定的点
#将data.frame转换为宽格式
tmp%>%
选择(cand,数字问题,结果)%>%
排列(数量、问题、结果)
#坎德X01 X02 X03 X04
#1 can1 01 0.5
#2 can2 110.5
#3 can3 0 2 1 0.0
我们可以使用dplyr
和tidyr
来收集
长格式的数据,在num_-question
和ans
上加入
,用0替换NA
s,并将数据扩展为宽格式
library(dplyr)
library(tidyr)
extract %>%
gather(key, value, -cand) %>%
left_join(answer, by = c("key" = "num_question", "value" = "ans")) %>%
replace_na(list(point = 0)) %>%
select(-value) %>%
spread(key, point)
# cand X01 X02 X03 X04
#1 can1 1 0 1 0.5
#2 can2 1 2 1 0.5
#3 can3 0 2 1 0.0
下面是一种基本的R方法,它具有堆栈
和取消堆栈
,即
d1 <- stack(extract[-1])
d1$values <- answer$point[match(do.call(paste, d1), paste(answer$ans, answer$num_question))]
d1$values <- replace(d1$values, is.na(d1$values), 0)
cbind.data.frame(cand = extract$cand, unstack(d1))
# cand X01 X02 X03 X04
#1 can1 1 0 1 0.5
#2 can2 1 2 1 0.5
#3 can3 0 2 1 0.0
d1这也可以通过使用apply
使用base R完成:
提取[,-1]cand X01 X02 X03 X04
#>1 can1 01 0.5
#>2 can2 110.5
#>3 can3 0 2 1 0.0
对于extract
中的每一行(即每个候选项),使用match
在data.frames之间匹配问题编号。如果给定答案与正确答案一致,则返回关联点,否则返回零
或不使用仅使用矢量化操作应用:
答案1 can1 0.5
#>2 can2 110.5
#>3 can3 0 2 1 0.0
在哪里可以找到堆栈函数?在utils包中?它是一个基本的R函数
result_research <- data.frame(cand = c("can1","can2","can3"),
X01 = c(1,1,0),
X02 = c(0,2,2),
X03 = c(1,1,1),
X04 = c(0.5,0.5,0))
cand X01 X02 X03 X04
can1 1 0 1 0.5
can2 1 2 1 0.5
can3 0 2 1 0.0
library(tidyverse)
tmp <- extract %>%
gather(num_question, can_ans, -cand) %>% # turn the extract data.frame into long format
left_join(answer, by="num_question") %>% #merge extract by question number into a single data.frame
mutate(correct = (can_ans == ans)+0) %>% # is candidate answer the same as the correct answer (1 = TRUE, 0 = FALSE)
mutate(result = correct*point) # multiply correct answer (1) with points given
#turn the data.frame into wide format
tmp %>%
select(cand, num_question, result) %>%
spread(num_question,result)
# cand X01 X02 X03 X04
# 1 can1 1 0 1 0.5
# 2 can2 1 2 1 0.5
# 3 can3 0 2 1 0.0
library(dplyr)
library(reshape2)
dataQA <- melt(extract, id ="cand") %>% #long format for extract
rename(num_question = variable, ans = value) %>%
left_join(answer) %>% #merge dataframes
mutate(point = ifelse(is.na(point), 0, point)) %>% #wrong answer = 0
select(cand, num_question, point) %>%
dcast(cand ~ num_question) #back to wide format
cand X01 X02 X03 X04
1 can1 1 0 1 0.5
2 can2 1 2 1 0.5
3 can3 0 2 1 0.0
library(dplyr)
library(tidyr)
extract %>%
gather(key, value, -cand) %>%
left_join(answer, by = c("key" = "num_question", "value" = "ans")) %>%
replace_na(list(point = 0)) %>%
select(-value) %>%
spread(key, point)
# cand X01 X02 X03 X04
#1 can1 1 0 1 0.5
#2 can2 1 2 1 0.5
#3 can3 0 2 1 0.0
d1 <- stack(extract[-1])
d1$values <- answer$point[match(do.call(paste, d1), paste(answer$ans, answer$num_question))]
d1$values <- replace(d1$values, is.na(d1$values), 0)
cbind.data.frame(cand = extract$cand, unstack(d1))
# cand X01 X02 X03 X04
#1 can1 1 0 1 0.5
#2 can2 1 2 1 0.5
#3 can3 0 2 1 0.0