R中的宽帧子集

R中的宽帧子集,r,duplicates,subset,R,Duplicates,Subset,我有一个大型数据集,其中包含以下场景: df <- structure(list(Variable =c("A", "A", "A", "B", "B", "B", "C", "C", "C"), Day1=c("1", "1", "-1", "1", "1", "-1", "1", "1", "1"), Day2=c("1", "1", "-1", "1", "1", "-1", "1", "1", "1"), Day3=c("1", "1", "1", "1", "1", "-

我有一个大型数据集,其中包含以下场景:

df <- structure(list(Variable =c("A", "A", "A", "B", "B", "B", "C", "C", "C"), 
 Day1=c("1", "1", "-1", "1", "1", "-1", "1", "1", "1"),
 Day2=c("1", "1", "-1", "1", "1", "-1", "1", "1", "1"),
 Day3=c("1", "1", "1", "1", "1", "-1", "1", "1", "1"),
 Day4=c("1", "1", "1", "1", "1", "1", "1", "1", "-1"),
 Day5=c("1", "1", "1", "1", "1", "1", "1", "1", "-1")),
 class= "data.frame", row.names=c(NA, -9L))
df这里有一个解决方案:

library(dplyr)

# Define a helper-function to count -1's on selected days
sum_days <- function(.df, n) {
  vars <- paste0("Day", n)
  rowSums(.df[vars] == -1)
}

# Now mark which rows go where:
grouped_df <- df %>% 
  mutate(
    DF1 = sum_days(., 1:2) >= 2,
    DF2 = sum_days(., 3) == 1,
    DF3 = sum_days(., 1:5) >= 2
  ) %>%
  group_by(Variable) %>%
  mutate_at(vars(starts_with("DF")), any) %>%
  ungroup()

如果您不熟悉管道操作符(
%%>%%
),这是一种线性化代码的方法。它调用下一个函数,将上一个函数的结果作为第一个参数。

在第三个df中,为什么不包括?
df <- structure(list(Variable =c("B", "B", "B"), 
 Day1=c("1", "1", "-1"),
 Day2=c("1", "1", "-1"),
 Day3=c("1", "1", "-1"),
 Day4=c("1", "1", "1"),
 Day5=c("1", "1", "1")),
 class= "data.frame", row.names=c(NA, -3L)) 
df <- structure(list(Variable =c("C", "C", "C"), 
 Day1=c("1", "1", "-"),
 Day2=c("1", "1", "1"),
 Day3=c("1", "1", "1"),
 Day4=c("1", "1", "-1"),
 Day5=c("1", "1", "-1")),
 class= "data.frame", row.names=c(NA, -3L)) 
library(dplyr)

# Define a helper-function to count -1's on selected days
sum_days <- function(.df, n) {
  vars <- paste0("Day", n)
  rowSums(.df[vars] == -1)
}

# Now mark which rows go where:
grouped_df <- df %>% 
  mutate(
    DF1 = sum_days(., 1:2) >= 2,
    DF2 = sum_days(., 3) == 1,
    DF3 = sum_days(., 1:5) >= 2
  ) %>%
  group_by(Variable) %>%
  mutate_at(vars(starts_with("DF")), any) %>%
  ungroup()
df1 <- grouped_df[grouped_df $DF1,]
df2 <- grouped_df[grouped_df $DF2,]
df3 <- grouped_df[grouped_df $DF3,]