R 重新编码调查多项选择题的输出

R 重新编码调查多项选择题的输出,r,multiple-choice,limesurvey,R,Multiple Choice,Limesurvey,我用limesurvey进行了一次调查,并将结果导出为csv.-文件,然后导入R 其中一个问题是多项选择题,参与者可以在其中说出他们研究的主题。limesurvey的输出看起来有点像这样(但是有更多的主题和更多的参与者): 我想得到一个像这样的结果 Participant | Subject 1 | Subject 2| Subject 3 | 1 | Maths | English | | 2 |

我用limesurvey进行了一次调查,并将结果导出为csv.-文件,然后导入R

其中一个问题是多项选择题,参与者可以在其中说出他们研究的主题。limesurvey的输出看起来有点像这样(但是有更多的主题和更多的参与者):

我想得到一个像这样的结果

Participant | Subject 1 | Subject 2| Subject 3  |
1           |   Maths   | English  |            |         
2           |   Physics | English  |            |         
3           |   Maths   | Physics  | Biology    |        
wh <- which(dtf == "Y", arr.ind=TRUE)
tapply(wh[,2], wh[,1], function(x) colnames(dtf)[x])
# $`1`
# [1] "Physics"   "Chemistry"

# $`2`
# [1] "Physics" "Math"    "Biology"

# $`3`
# [1] "Math"

# $`4`
# [1] "Math"    "Biology"

如果有人给我指点,我将不胜感激

我对这种数据争论有点不切实际,但这里有一些建议

首先,假设您的数据为以下形式:

dtf <- structure(list(Participant = c("1", "2", "3", "4"),
Physics = c("Y", "Y", "N", "N"), Chemistry = c("Y", "N", "N",
"N"), Math = c("N", "Y", "Y", "Y"), Biology = c("N", "Y", "N",
"Y")), class = "data.frame", row.names = c(NA, -4L))

我对这种数据争论有点不切实际,但这里有一些建议

首先,假设您的数据为以下形式:

dtf <- structure(list(Participant = c("1", "2", "3", "4"),
Physics = c("Y", "Y", "N", "N"), Chemistry = c("Y", "N", "N",
"N"), Math = c("N", "Y", "Y", "Y"), Biology = c("N", "Y", "N",
"Y")), class = "data.frame", row.names = c(NA, -4L))

下面是我根据请求生成预期数据帧的尝试:

library(tidyverse)
library(gtools)
rand_list = c('Y', NA)
df = data.frame(participant = seq(1,10, by = 1), # r starts counting from 0
                Maths = sample(rand_list, 10, replace = TRUE),
                Physics = sample(rand_list, 10, replace = TRUE),
                English = sample(rand_list, 10, replace = TRUE),
                Biology = sample(rand_list, 10, replace = TRUE))

df_to_new_format = function(data){
  vector_subject = colnames(data)
  vector_new_col = c()
  for (i in 1:length(vector_subject)){
    if (i == 1){
      new_col = 'participant'
      vector_new_col <- c(vector_new_col, new_col)
      rm(new_col)
    } else{
      new_col = paste('Subject', as.character(i - 1))
      vector_new_col <- c(vector_new_col, new_col)
      rm(new_col)
    }
  }

  for (j in 1:length(vector_subject)){
    if (j == 1){
      next
    } else{
      data[[j]] <- recode(data[[j]], 'Y' = vector_subject[j])
    }
  }

  colnames(data) <- vector_new_col
  return(data)
}

df = df_to_new_format(data = df)
df_new_format = c()

for (m in 1:nrow(df)){
  temp = mixedsort(as.matrix(df[m,]))
  print(temp)
  df_new_format = rbind(df_new_format, temp)
}

df_new_format = as.data.frame(df_new_format, row.names = FALSE)
colnames(df_new_format) = colnames(df)
库(tidyverse)
图书馆(gtools)
随机列表=c('Y',NA)
df=data.frame(参与者=seq(1,10,by=1),#r从0开始计数
数学=样本(随机列表,10,替换=真),
物理=样本(随机列表,10,替换=真),
英语=样本(随机列表,10,替换=真),
生物学=样本(随机列表,10,替换=真))
df_至_新_格式=函数(数据){
vector_subject=colnames(数据)
向量_new_col=c()
用于(i/1:长度(向量_主题)){
如果(i==1){
新列=‘参与者’

vector_new_col以下是我根据请求生成预期数据帧的尝试:

library(tidyverse)
library(gtools)
rand_list = c('Y', NA)
df = data.frame(participant = seq(1,10, by = 1), # r starts counting from 0
                Maths = sample(rand_list, 10, replace = TRUE),
                Physics = sample(rand_list, 10, replace = TRUE),
                English = sample(rand_list, 10, replace = TRUE),
                Biology = sample(rand_list, 10, replace = TRUE))

df_to_new_format = function(data){
  vector_subject = colnames(data)
  vector_new_col = c()
  for (i in 1:length(vector_subject)){
    if (i == 1){
      new_col = 'participant'
      vector_new_col <- c(vector_new_col, new_col)
      rm(new_col)
    } else{
      new_col = paste('Subject', as.character(i - 1))
      vector_new_col <- c(vector_new_col, new_col)
      rm(new_col)
    }
  }

  for (j in 1:length(vector_subject)){
    if (j == 1){
      next
    } else{
      data[[j]] <- recode(data[[j]], 'Y' = vector_subject[j])
    }
  }

  colnames(data) <- vector_new_col
  return(data)
}

df = df_to_new_format(data = df)
df_new_format = c()

for (m in 1:nrow(df)){
  temp = mixedsort(as.matrix(df[m,]))
  print(temp)
  df_new_format = rbind(df_new_format, temp)
}

df_new_format = as.data.frame(df_new_format, row.names = FALSE)
colnames(df_new_format) = colnames(df)
库(tidyverse)
图书馆(gtools)
随机列表=c('Y',NA)
df=data.frame(参与者=seq(1,10,by=1),#r从0开始计数
数学=样本(随机列表,10,替换=真),
物理=样本(随机列表,10,替换=真),
英语=样本(随机列表,10,替换=真),
生物学=样本(随机列表,10,替换=真))
df_至_新_格式=函数(数据){
vector_subject=colnames(数据)
向量_new_col=c()
用于(i/1:长度(向量_主题)){
如果(i==1){
新列=‘参与者’


vector_new_col“…看起来有点像这样…”,如果您向我们展示它到底是怎样的,那么帮助您会容易得多。请尝试
dput(head(您的_数据))
,或者类似的东西。我可能不够清楚,抱歉!我的文件看起来与我给出的示例完全一样,只是有更多的主题和更多的参与者,我不想粘贴在这里。导入到R时是这样的吗?是的,它是一个类似于此的数据帧如果它看起来不是data.frame,它就是这样的。“…看起来有点像这样…”,如果您能准确地向我们展示它是怎样的,那么帮助您会容易得多。请尝试
dput(head(您的_数据))
,或者类似的东西。我可能不够清楚,抱歉!我的文件看起来和我给出的示例完全一样,只是有更多的主题和更多的参与者,我不想粘贴在这里。导入到R时是这样的吗?是的,它是一个类似于此的数据帧如果它看起来不是data.frame,那么它就是一个data.frameks很有魅力,谢谢!我刚刚注意到,它不断创建尽可能多的列,因为有df_new_格式的主题(不需要,只是用NAs填充),但我可以很容易地摆脱它们。是的,这是一般情况下,如果有额外的主题。很高兴它有帮助!这就像一个魅力,谢谢你!我只是注意到,它不断创造尽可能多的列有主题在df_新_格式(这是不需要的,只是充满了NAs),但我可以很容易地摆脱它们。是的,这是一般情况下的情况,前提是有额外的主题。很高兴它有帮助!感谢您花时间帮助我!我发现kon_u的答案对我的用例更有效,但也可能是因为我对数据的描述不够精确。@SamVimes:很高兴听到。Y是的,输入数据和预期输出的a使猜测保持在最低限度。感谢您花时间来帮助我!我发现kon_的答案对我的用例更有效,但这也可能是因为我对数据的描述不够精确。@SamVimes:很高兴听到。是的,输入数据和预期都有aed输出将猜测保持在最低限度。
library(tidyverse)
library(gtools)
rand_list = c('Y', NA)
df = data.frame(participant = seq(1,10, by = 1), # r starts counting from 0
                Maths = sample(rand_list, 10, replace = TRUE),
                Physics = sample(rand_list, 10, replace = TRUE),
                English = sample(rand_list, 10, replace = TRUE),
                Biology = sample(rand_list, 10, replace = TRUE))

df_to_new_format = function(data){
  vector_subject = colnames(data)
  vector_new_col = c()
  for (i in 1:length(vector_subject)){
    if (i == 1){
      new_col = 'participant'
      vector_new_col <- c(vector_new_col, new_col)
      rm(new_col)
    } else{
      new_col = paste('Subject', as.character(i - 1))
      vector_new_col <- c(vector_new_col, new_col)
      rm(new_col)
    }
  }

  for (j in 1:length(vector_subject)){
    if (j == 1){
      next
    } else{
      data[[j]] <- recode(data[[j]], 'Y' = vector_subject[j])
    }
  }

  colnames(data) <- vector_new_col
  return(data)
}

df = df_to_new_format(data = df)
df_new_format = c()

for (m in 1:nrow(df)){
  temp = mixedsort(as.matrix(df[m,]))
  print(temp)
  df_new_format = rbind(df_new_format, temp)
}

df_new_format = as.data.frame(df_new_format, row.names = FALSE)
colnames(df_new_format) = colnames(df)