R 具有不同结构的2个数据帧的组合

R 具有不同结构的2个数据帧的组合,r,merge,R,Merge,我寻找一种有效的方法来组合这两个数据帧: 一个包含问题及其答案和相关要点(按行组织的问题) 答案库(tidyverse) tmp% 聚集(num_question,can_ans,-cand)%>%#将extract data.frame转换为长格式 left_join(answer,by=“num_question”)%%>%#将按问题编号提取的内容合并到单个data.frame中 mutate(correct=(can_ans==ans)+0)%>%#候选答案是否与正确答案相同(1=TRUE

我寻找一种有效的方法来组合这两个数据帧: 一个包含问题及其答案和相关要点(按行组织的问题)

答案
库(tidyverse)
tmp%
聚集(num_question,can_ans,-cand)%>%#将extract data.frame转换为长格式
left_join(answer,by=“num_question”)%%>%#将按问题编号提取的内容合并到单个data.frame中
mutate(correct=(can_ans==ans)+0)%>%#候选答案是否与正确答案相同(1=TRUE,0=FALSE)
变异(结果=正确*点)#将正确答案(1)乘以给定的点
#将data.frame转换为宽格式
tmp%>%
选择(cand,数字问题,结果)%>%
排列(数量、问题、结果)
#坎德X01 X02 X03 X04
#1 can1 01 0.5
#2 can2 110.5
#3 can3 0 2 1 0.0

我们可以使用
dplyr
tidyr
收集
长格式的数据,
num_-question
ans
上加入
,用0替换
NA
s,并
将数据扩展为宽格式

library(dplyr)
library(tidyr)

extract %>%
  gather(key, value, -cand) %>%
  left_join(answer, by = c("key" = "num_question", "value" = "ans")) %>%
  replace_na(list(point = 0)) %>%
  select(-value) %>%
  spread(key, point)

#  cand X01 X02 X03 X04
#1 can1   1   0   1 0.5
#2 can2   1   2   1 0.5
#3 can3   0   2   1 0.0

下面是一种基本的R方法,它具有
堆栈
取消堆栈
,即

d1 <- stack(extract[-1])
d1$values <- answer$point[match(do.call(paste, d1), paste(answer$ans, answer$num_question))]
d1$values <- replace(d1$values, is.na(d1$values), 0)
cbind.data.frame(cand = extract$cand, unstack(d1))

#  cand X01 X02 X03 X04
#1 can1   1   0   1 0.5
#2 can2   1   2   1 0.5
#3 can3   0   2   1 0.0

d1这也可以通过使用
apply
使用base R完成:

提取[,-1]cand X01 X02 X03 X04
#>1 can1 01 0.5
#>2 can2 110.5
#>3 can3 0 2 1 0.0
对于
extract
中的每一行(即每个候选项),使用
match
在data.frames之间匹配问题编号。如果给定答案与正确答案一致,则返回关联点,否则返回零


或不使用
仅使用矢量化操作应用

答案1 can1 0.5
#>2 can2 110.5
#>3 can3 0 2 1 0.0

在哪里可以找到堆栈函数?在utils包中?它是一个基本的R函数
result_research <- data.frame(cand = c("can1","can2","can3"), 
                              X01 = c(1,1,0),
                              X02 = c(0,2,2),
                              X03 = c(1,1,1),
                              X04 = c(0.5,0.5,0))

   cand X01 X02 X03 X04
   can1   1   0   1 0.5
   can2   1   2   1 0.5
   can3   0   2   1 0.0
library(tidyverse)

tmp <- extract %>% 
  gather(num_question, can_ans, -cand) %>%  # turn the extract data.frame into long format
  left_join(answer, by="num_question") %>% #merge extract by question number into a single data.frame
  mutate(correct = (can_ans == ans)+0) %>% # is candidate answer the same as the correct answer (1 = TRUE, 0 = FALSE)
  mutate(result = correct*point) # multiply correct answer (1) with points given

#turn the data.frame into wide format
tmp %>% 
  select(cand, num_question, result) %>% 
  spread(num_question,result) 

#   cand X01 X02 X03 X04
# 1 can1   1   0   1 0.5
# 2 can2   1   2   1 0.5
# 3 can3   0   2   1 0.0
library(dplyr) 
library(reshape2) 

dataQA <- melt(extract, id ="cand") %>% #long format for extract
rename(num_question = variable, ans = value) %>% 
left_join(answer) %>% #merge dataframes
mutate(point = ifelse(is.na(point), 0, point)) %>% #wrong answer = 0
select(cand, num_question, point) %>% 
dcast(cand ~ num_question) #back to wide format 
cand X01 X02 X03 X04
1 can1   1   0   1 0.5
2 can2   1   2   1 0.5
3 can3   0   2   1 0.0
library(dplyr)
library(tidyr)

extract %>%
  gather(key, value, -cand) %>%
  left_join(answer, by = c("key" = "num_question", "value" = "ans")) %>%
  replace_na(list(point = 0)) %>%
  select(-value) %>%
  spread(key, point)

#  cand X01 X02 X03 X04
#1 can1   1   0   1 0.5
#2 can2   1   2   1 0.5
#3 can3   0   2   1 0.0
d1 <- stack(extract[-1])
d1$values <- answer$point[match(do.call(paste, d1), paste(answer$ans, answer$num_question))]
d1$values <- replace(d1$values, is.na(d1$values), 0)
cbind.data.frame(cand = extract$cand, unstack(d1))

#  cand X01 X02 X03 X04
#1 can1   1   0   1 0.5
#2 can2   1   2   1 0.5
#3 can3   0   2   1 0.0