R 对日期上字符串匹配的列求和

R 对日期上字符串匹配的列求和,r,dataframe,R,Dataframe,我有一个数据框df,其中包含ID变量和每日日期(格式XYYYMMDD)作为列标题: ID <- c(101,102,203,207,209) X20170101 <- c(1,NA,NA,2,1) X20170102 <- c(NA,1,1,1,NA) X20170103<-c(NA,NA,NA,2,1) X20170201<-c(NA,2,NA,NA,1) X20170202<-c(NA,1,1,NA,NA) X20170301<-c(NA,1,NA,

我有一个数据框df,其中包含ID变量和每日日期(格式XYYYMMDD)作为列标题:

ID <- c(101,102,203,207,209)
X20170101 <- c(1,NA,NA,2,1)
X20170102 <- c(NA,1,1,1,NA)
X20170103<-c(NA,NA,NA,2,1)
X20170201<-c(NA,2,NA,NA,1)
X20170202<-c(NA,1,1,NA,NA)
X20170301<-c(NA,1,NA,NA,NA)
df <- data.table(ID,X20170101,X20170102,X20170103,X20170201,X20170202,X20170301)

 ID X20170101 X20170102 X20170103 X20170201 X20170202 X20170301
101         1        NA        NA        NA        NA        NA
102        NA         1        NA         2         1         1
203        NA         1        NA        NA         1        NA
207         2         1         2        NA        NA        NA
209         1        NA         1         1        NA        NA
我的想法是避免重塑数据集的格式,并使用函数lappy和grepl部分匹配字符串,但我遗漏了一些东西

test = lapply(df, function(x) colSums(df[,grepl(x, names(df))]))

非常感谢。

您不想传播数据有什么原因吗

library(tidyverse)
want <- df %>%
          gather(key, value, -ID) %>%
          mutate(key = substr(key, 1, 7)) %>%
          group_by(ID, key) %>%
          summarise(value = sum(value, na.rm=TRUE)) %>%
          spread(key, value)

# A tibble: 5 x 4
# Groups:   ID [5]
     ID X201701 X201702 X201703
* <dbl>   <dbl>   <dbl>   <dbl>
1   101       1       0       0
2   102       1       3       1
3   203       1       1       0
4   207       5       0       0
5   209       2       1       0
库(tidyverse)
想要%
聚集(键,值,-ID)%%>%
变异(key=substr(key,1,7))%>%
分组依据(ID,键)%>%
总结(值=总和(值,na.rm=真))%>%
排列(键、值)
#一个tibble:5x4
#组别:ID[5]
ID X201701 X201702 X201703
*          
1   101       1       0       0
2   102       1       3       1
3   203       1       1       0
4   207       5       0       0
5   209       2       1       0

这里有一个使用
lubridate
包来解析日期和
拆分。默认值为
将data.frame基于同一个月划分为多个组

library(lubridate)
factors = sapply(ymd(gsub("X", "", names(df)[-1])), function(x)
    paste0(year(x), sprintf("%02d", as.integer(month(x)))))
data.frame(df[,1],
           lapply(split.default(df[,-1], factors), function(x)
               rowSums(x, na.rm = TRUE) * (NA^(rowSums(is.na(x)) == NCOL(x)))))
#   ID X201701 X201702 X201703
#1 101       1      NA      NA
#2 102       1       3       1
#3 203       1       1      NA
#4 207       5      NA      NA
#5 209       2       1      NA
library(tidyverse)
want <- df %>%
          gather(key, value, -ID) %>%
          mutate(key = substr(key, 1, 7)) %>%
          group_by(ID, key) %>%
          summarise(value = sum(value, na.rm=TRUE)) %>%
          spread(key, value)

# A tibble: 5 x 4
# Groups:   ID [5]
     ID X201701 X201702 X201703
* <dbl>   <dbl>   <dbl>   <dbl>
1   101       1       0       0
2   102       1       3       1
3   203       1       1       0
4   207       5       0       0
5   209       2       1       0
library(lubridate)
factors = sapply(ymd(gsub("X", "", names(df)[-1])), function(x)
    paste0(year(x), sprintf("%02d", as.integer(month(x)))))
data.frame(df[,1],
           lapply(split.default(df[,-1], factors), function(x)
               rowSums(x, na.rm = TRUE) * (NA^(rowSums(is.na(x)) == NCOL(x)))))
#   ID X201701 X201702 X201703
#1 101       1      NA      NA
#2 102       1       3       1
#3 203       1       1      NA
#4 207       5      NA      NA
#5 209       2       1      NA