基于查找表用dplyr重新编码矩阵值

基于查找表用dplyr重新编码矩阵值,r,dplyr,recode,R,Dplyr,Recode,我有一个包含大量案例的矩阵,以及一份调查问卷的相应答案。下面的大幅简化示例(原始回答)包含5人对5项的回答。让我们假设这些是多项选择题,每个选项有4个可能的答案。如果未处理该项目,则该人员收到代码9 raw_responses <- data.frame('id' = 1:10, 'item_1' = sample(c(1:4,9), 10, replace = TRUE),

我有一个包含大量案例的矩阵,以及一份调查问卷的相应答案。下面的大幅简化示例(原始回答)包含5人对5项的回答。让我们假设这些是多项选择题,每个选项有4个可能的答案。如果未处理该项目,则该人员收到代码9

raw_responses <- data.frame('id' = 1:10,
                            'item_1' =  sample(c(1:4,9), 10, replace = TRUE),
                            'item_2' =  sample(c(1:4,9), 10, replace = TRUE),
                            'item_3' =  sample(c(1:4,9), 10, replace = TRUE),
                            'item_4' =  sample(c(1:4,9), 10, replace = TRUE),
                            'item_5' =  sample(c(1:4,9), 10, replace = TRUE))

raw_responses以长格式获取数据,连接、重新编码值并以宽格式获取数据

library(dplyr)
library(tidyr)

raw_responses %>%
  pivot_longer(cols = -id, names_to = 'item') %>%
  left_join(design, by = 'item') %>%
  mutate(value = case_when(value == 9 ~ 99,
                           value == key ~ 1, 
                           TRUE ~ 0)) %>%
  select(-key) %>%
  pivot_wider(names_from = 'item')

# A tibble: 10 x 6
#      id item_1 item_2 item_3 item_4 item_5
#   <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1     99     99      1      0      0
# 2     2     99     99     99      0      0
# 3     3      1     99      0     99     99
# 4     4      0      1      1     99      1
# 5     5     99      0      1      0      1
# 6     6      0      1      0      0      1
# 7     7      0      0      0      1     99
# 8     8      1     99      0      0      0
# 9     9      0     99     99      0      1
#10    10     99      1     99      1      0
然而,为了使这个答案有效,我们需要确保
raw_响应
design$item
中的列名顺序相同。在本例中,它们的顺序已经相同,但是,在实际数据中,如果它们不是,我们可以通过执行以下操作来实现:

raw_responses[-1] <- raw_responses[-1][design$key]
raw_响应[-1]
library(dplyr)
library(tidyr)

raw_responses %>%
  pivot_longer(cols = -id, names_to = 'item') %>%
  left_join(design, by = 'item') %>%
  mutate(value = case_when(value == 9 ~ 99,
                           value == key ~ 1, 
                           TRUE ~ 0)) %>%
  select(-key) %>%
  pivot_wider(names_from = 'item')

# A tibble: 10 x 6
#      id item_1 item_2 item_3 item_4 item_5
#   <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
# 1     1     99     99      1      0      0
# 2     2     99     99     99      0      0
# 3     3      1     99      0     99     99
# 4     4      0      1      1     99      1
# 5     5     99      0      1      0      1
# 6     6      0      1      0      0      1
# 7     7      0      0      0      1     99
# 8     8      1     99      0      0      0
# 9     9      0     99     99      0      1
#10    10     99      1     99      1      0
library(purrr)
map2_dfc(raw_responses[-1], design$key, ~case_when(.x == 9 ~ 99,
                                                   .x == .y ~ 1, 
                                                   TRUE ~ 0))
raw_responses[-1] <- raw_responses[-1][design$key]