在R中创建条件新变量

在R中创建条件新变量,r,dplyr,tidyr,R,Dplyr,Tidyr,我需要重新创建一个非常大的数据帧(900多个变量)的原始变量。 下面是我尝试做的一个例子: dat <- data.frame( id=c('user1','user2','user3'), agePanel1=c(20,25,32), agePanel2=c(21,NA,33), favColPanel1=c('blue','red','blue'), favColPanel2=c('red',NA,'red') ) id

我需要重新创建一个非常大的数据帧(900多个变量)的原始变量。 下面是我尝试做的一个例子:

dat <- data.frame(
    id=c('user1','user2','user3'),
    agePanel1=c(20,25,32),
    agePanel2=c(21,NA,33),
    favColPanel1=c('blue','red','blue'),
    favColPanel2=c('red',NA,'red')
    )

     id      agePanel1 agePanel2 favColPanel1 favColPanel2
  1 user1        20        21         blue          red
  2 user2        25        NA          red           NA
  3 user3        32        33         blue          red
我开始尝试使用dplyr和tidyr:

mutate(dat, age = ifelse(is.na(test$agePanel2),agePanel1,NA))

我正在努力寻找一种方法来做一个循环或者一些可以自动化这个过程的东西

您可以循环浏览要操作的列组的名称,对每个列组执行操作:

cols <- c("age", "favCol")
for (col in cols) {
  dat[,col] <- dat[,paste0(col, "Panel1")]
  dat[!is.na(dat[,paste0(col, "Panel2")]), col] <- NA
  dat[is.na(dat[,paste0(col, "Panel2")]),paste0(col, "Panel1")] <- NA
}
dat
#      id agePanel1 agePanel2 favColPanel1 favColPanel2 age favCol
# 1 user1        20        21         blue          red  NA   <NA>
# 2 user2        NA        NA         <NA>         <NA>  25    red
# 3 user3        32        33         blue          red  NA   <NA>

cols您可以使用
Map

 dat[c('age', 'favcol')] <-Map(function(x,y) {
            indx <- rowSums(is.na(cbind(x,y)))
             x[seq_along(x)*NA^!indx]}, dat[c(2,4)], dat[c(3,5)])
 dat[rowSums(is.na(dat[2:5]))!=0,2:5] <- NA
 dat
 #    id agePanel1 agePanel2 favColPanel1 favColPanel2 age favcol
 #1 user1        20        21         blue          red  NA   <NA>
 #2 user2        NA        NA         <NA>         <NA>  25    red
 #3 user3        32        33         blue          red  NA   <NA>

dat[c('age','favcol')]这并不是你最初问题的最直接的解决方案。但在我看来,将您的数据以长格式保存是可取的。这样,您想要做的操作(以及大多数其他操作)就会容易得多

# required packages
require(dplyr)
require(tidyr)
# get data in long format
dat_long <- dat %>% 
  gather(key, value, -id) %>% 
  separate(key, c("key", "panel"), sep = "Panel") %>%
  spread(key, value, convert = TRUE) %>% 
  arrange(id, panel) %>%
  group_by(id)
dat_long
## Source: local data frame [6 x 4]
## Groups: id
## 
##      id panel age favCol
## 1 user1     1  20   blue
## 2 user1     2  21    red
## 3 user2     1  25    red
## 4 user2     2  NA     NA
## 5 user3     1  32   blue
## 6 user3     2  33    red

# functon that does desired operation
panel_fct <- function(x){
  ifelse(is.na(x[2]), x[1], as(NA, class(x)))
}
# use mutate_each to do desired operation
dat_long %>% summarise_each(funs(panel_fct), -panel)
## Source: local data frame [3 x 3]
##
##      id age favCol
## 1 user1  NA     NA
## 2 user2  25    red
## 3 user3  NA     NA

我觉得你已经有了这个:
dat$age谢谢你的回答这非常有帮助,很抱歉耽搁了,最近几天有点忙;)
# required packages
require(dplyr)
require(tidyr)
# get data in long format
dat_long <- dat %>% 
  gather(key, value, -id) %>% 
  separate(key, c("key", "panel"), sep = "Panel") %>%
  spread(key, value, convert = TRUE) %>% 
  arrange(id, panel) %>%
  group_by(id)
dat_long
## Source: local data frame [6 x 4]
## Groups: id
## 
##      id panel age favCol
## 1 user1     1  20   blue
## 2 user1     2  21    red
## 3 user2     1  25    red
## 4 user2     2  NA     NA
## 5 user3     1  32   blue
## 6 user3     2  33    red

# functon that does desired operation
panel_fct <- function(x){
  ifelse(is.na(x[2]), x[1], as(NA, class(x)))
}
# use mutate_each to do desired operation
dat_long %>% summarise_each(funs(panel_fct), -panel)
## Source: local data frame [3 x 3]
##
##      id age favCol
## 1 user1  NA     NA
## 2 user2  25    red
## 3 user3  NA     NA
dat_long %>% summarise_each(funs(panel_fct), -panel) %>% left_join(dat, by = "id")
## Source: local data frame [3 x 7]
## 
##      id age favCol agePanel1 agePanel2 favColPanel1 favColPanel2
## 1 user1  NA     NA        20        21         blue          red
## 2 user2  25    red        25        NA          red           NA
## 3 user3  NA     NA        32        33         blue          red