Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/81.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 如何使用应用于几列数据的条件创建新变量?_R_Function_Na_Recode - Fatal编程技术网

R 如何使用应用于几列数据的条件创建新变量?

R 如何使用应用于几列数据的条件创建新变量?,r,function,na,recode,R,Function,Na,Recode,我是一名R新手,目前在代码方面遇到了一些困难。基本上,我在数据集中有几个变量,其中包含个人经常参与的活动类型的信息(例如1=阅读,2=艺术和手工艺,3=园艺等) 一些模拟数据: df = data.frame(ID = c(1001, 1002, 1003, 1004, 1005,1006,1007,1008,1009,1010,1011), orig_1 = c('-7', '2','1','1','NA','2', '3','NA','NA','2',

我是一名R新手,目前在代码方面遇到了一些困难。基本上,我在数据集中有几个变量,其中包含个人经常参与的活动类型的信息(例如1=阅读,2=艺术和手工艺,3=园艺等)

一些模拟数据:

df = data.frame(ID = c(1001, 1002, 1003, 1004, 1005,1006,1007,1008,1009,1010,1011),
                    orig_1 = c('-7', '2','1','1','NA','2', '3','NA','NA','2', '2'),
                    orig_2 = c('1','1','2','1','3','2', '2', '3','NA','2', '2'),
                    orig_3 = c('-7','3','NA','1','NA','2','NA','1','NA','2', '2'))
基于这些变量,我想创建新的变量,例如,反映一个人是否参与特定的活动(例如,0=否,1=是)。我做的第一件事是代码值,对应于NA的“不知道”:


我非常感谢您对改进此代码的任何建议

首先,您需要实现
NA
s。您正在执行的是
'NA'
,这是一个字符串,与
NA
不同。我们可以这样解决这个问题:

df[df == "NA"] <- NA
或矢量化,如@akrun建议的:

df[!rowSums(!is.na(df[2:4])), 5:7] <- NA
df[!rowSums(!is.na(df[2:4])),5:7]使用dplyr:

library(dplyr)
#df[df == "NA"] <- NA
df %>%   mutate(activity_1 = case_when( orig_1 == 1 | orig_2 == 1 | orig_3 == 1 ~ 1,
                                 TRUE ~ 0),
         activity_2 = case_when( orig_1 == 2 | orig_2 == 2 | orig_3 == 2 ~ 1,
                                 TRUE ~ 0),
         activity_3 = case_when( orig_1 == 3 | orig_2 == 3 | orig_3 == 3 ~ 1,
                                 TRUE ~ 0)) %>%

  #mutate_at(.vars = c(5:7), list(~ifelse(is.na(orig_1) & is.na(orig_2) &is.na(orig_3), NA, .)))

  mutate_at(.vars = c(5:7), list(~ifelse(orig_1 =="NA" & orig_2  =="NA" & orig_3  =="NA", NA, .)))

伟大的“逻辑化”与
,谢谢@akrun,添加到答案中!抱歉,为了完成起见,添加了@jay.sf以处理NAs以更改之前的所有字符NAs或以字符形式离开。。。
df$activity_1[df$orig_1==NA & df$orig_2==NA & df$orig_3==NA] <- NA
     ID orig_1 orig_2 orig_3 activity_1 activity_2 activity_3
1  1001      NA      1    NA          1          0          0
2  1002      2      1     NA          1          1          0
3  1003      1      2     NA          1          1          0
4  1004      1      1      1          1          0          0
5  1005     NA      3     NA          0          0          1
6  1006      2      2      2          0          1          0
7  1007      3      2     NA          0          1          1
8  1008     NA     NA      1          1          0          0
9  1009     NA     NA     NA          NA         NA         NA
10 1010      2      2      2          0          1          0
11 1011      2      2      2          0          1          0
df[df == "NA"] <- NA
df[apply(df[2:4], 1, function(x) all(is.na(x))), 5:7] <- NA
df[!rowSums(!is.na(df[2:4])), 5:7] <- NA
df
#      ID orig_1 orig_2 orig_3 activity_1 activity_2 activity_3
# 1  1001     -7      1     -7          1          0          0
# 2  1002      2      1      3          1          1          1
# 3  1003      1      2   <NA>          1          1          0
# 4  1004      1      1      1          1          0          0
# 5  1005   <NA>      3   <NA>          0          0          1
# 6  1006      2      2      2          0          1          0
# 7  1007      3      2   <NA>          0          1          1
# 8  1008   <NA>      3      1          1          0          1
# 9  1009   <NA>   <NA>   <NA>         NA         NA         NA
# 10 1010      2      2      2          0          1          0
# 11 1011      2      2      2          0          1          0
df <- structure(list(ID = c(1001, 1002, 1003, 1004, 1005, 1006, 1007, 
1008, 1009, 1010, 1011), orig_1 = structure(c(NA, 3L, 2L, 2L, 
5L, 3L, 4L, 5L, 5L, 3L, 3L), .Label = c("-7", "1", "2", "3", 
"NA"), class = "factor"), orig_2 = structure(c(1L, 1L, 2L, 1L, 
3L, 2L, 2L, 3L, 4L, 2L, 2L), .Label = c("1", "2", "3", "NA"), class = "factor"), 
    orig_3 = structure(c(NA, 4L, 5L, 2L, 5L, 3L, 5L, 2L, 5L, 
    3L, 3L), .Label = c("-7", "1", "2", "3", "NA"), class = "factor"), 
    activity_1 = c(1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0), activity_2 = c(0, 
    1, 1, 0, 0, 1, 1, 0, 0, 1, 1), activity_3 = c(0, 1, 0, 0, 
    1, 0, 1, 1, 0, 0, 0)), .Names = c("ID", "orig_1", "orig_2", 
"orig_3", "activity_1", "activity_2", "activity_3"), row.names = c(NA, 
-11L), class = "data.frame")
library(dplyr)
#df[df == "NA"] <- NA
df %>%   mutate(activity_1 = case_when( orig_1 == 1 | orig_2 == 1 | orig_3 == 1 ~ 1,
                                 TRUE ~ 0),
         activity_2 = case_when( orig_1 == 2 | orig_2 == 2 | orig_3 == 2 ~ 1,
                                 TRUE ~ 0),
         activity_3 = case_when( orig_1 == 3 | orig_2 == 3 | orig_3 == 3 ~ 1,
                                 TRUE ~ 0)) %>%

  #mutate_at(.vars = c(5:7), list(~ifelse(is.na(orig_1) & is.na(orig_2) &is.na(orig_3), NA, .)))

  mutate_at(.vars = c(5:7), list(~ifelse(orig_1 =="NA" & orig_2  =="NA" & orig_3  =="NA", NA, .)))
df %>% na_if(.,"NA") %>% #na_if replaces specified value (this case "NA") to NA
                mutate(activity_1 = case_when( orig_1 == 1 | orig_2 == 1 | orig_3 == 1 ~ 1,
                                        TRUE ~ 0),
                activity_2 = case_when( orig_1 == 2 | orig_2 == 2 | orig_3 == 2 ~ 1,
                                        TRUE ~ 0),
                activity_3 = case_when( orig_1 == 3 | orig_2 == 3 | orig_3 == 3 ~ 1,
                                        TRUE ~ 0)) %>%
  mutate_at(.vars = c(5:7), list(~ifelse(is.na(orig_1) & is.na(orig_2) &is.na(orig_3), NA, .)))    

          ID orig_1 orig_2 orig_3 activity_1 activity_2 activity_3
1  1001     -7      1     -7          1          0          0
2  1002      2      1      3          1          1          1
3  1003      1      2     NA          1          1          0
4  1004      1      1      1          1          0          0
5  1005     NA      3     NA          0          0          1
6  1006      2      2      2          0          1          0
7  1007      3      2     NA          0          1          1
8  1008     NA      3      1          1          0          1
9  1009     NA     NA     NA         NA         NA         NA
10 1010      2      2      2          0          1          0
11 1011      2      2      2          0          1          0
>