R总开/关时间
我有一个数据集,带有时间戳和流量计的流量数据(加仑/分钟),每2分钟记录一次数据R总开/关时间,r,R,我有一个数据集,带有时间戳和流量计的流量数据(加仑/分钟),每2分钟记录一次数据 df <- structure(list(dt = structure(c(1519891200, 1519891320, 1519891440, 1519891560, 1519891680, 1519891800, 1519891920, 1519892040, 1519892160, 1519892280, 1519892400, 151
df <- structure(list(dt = structure(c(1519891200, 1519891320, 1519891440, 1519891560,
1519891680, 1519891800, 1519891920, 1519892040, 1519892160, 1519892280,
1519892400, 1519892520, 1519892640, 1519892760, 1519892880),
class = c("POSIXct", "POSIXt"), tzone = ""),
gpm = c(0, 0, 0, 50, 50, 50, 50, 50, 0, 0, 80, 80, 80, 0, 0)),
.Names = c("dt", "gpm"),
row.names = c(NA, 15L),
class = "data.frame")
# dt gpm
# 1 2018-03-01 03:00:00 0
# 2 2018-03-01 03:02:00 0
# 3 2018-03-01 03:04:00 0
# 4 2018-03-01 03:06:00 50
# 5 2018-03-01 03:08:00 50
# 6 2018-03-01 03:10:00 50
# 7 2018-03-01 03:12:00 50
# 8 2018-03-01 03:14:00 50
# 9 2018-03-01 03:16:00 0
# 10 2018-03-01 03:18:00 0
# 11 2018-03-01 03:20:00 80
# 12 2018-03-01 03:22:00 80
# 13 2018-03-01 03:24:00 80
# 14 2018-03-01 03:26:00 0
# 15 2018-03-01 03:28:00 0
在
dplyr
中使用cumsum
:
library(dplyr)
df %>% mutate(id = cumsum((gpm + c(0, lag(gpm)[-1])) == 0)) %>%
filter(gpm != 0) %>% group_by(id) %>%
mutate_at(vars(dt), .funs = list(on = min, off = max)) %>%
summarise_all(list(~mean(., na.rm=TRUE))) %>%
select(-dt,-id,-gpm,gpm)
或者在数据中使用rle
和cumsum
。表
:
library(data.table)
setDT(df)[, id := {
rid <-rle(+(df$gpm + c(0,lag(df$gpm)[-1]) != 0 ))
rid$values <- cumsum(rid$values) * rid$values
inverse.rle(rid) * +(df$gpm!=0)
}][gpm !=0
][ , list(on = min(dt), off = max(dt), gpm = mean(gpm)) ,
by = .(id)][,id:=NULL][]
library(data.table)
setDT(df)[, id := {
rid <-rle(+(df$gpm + c(0,lag(df$gpm)[-1]) != 0 ))
rid$values <- cumsum(rid$values) * rid$values
inverse.rle(rid) * +(df$gpm!=0)
}][gpm !=0
][ , list(on = min(dt), off = max(dt), gpm = mean(gpm)) ,
by = .(id)][,id:=NULL][]
# on off gpm
# 1 2018-03-01 03:06:00 2018-03-01 03:14:00 50
# 2 2018-03-01 03:20:00 2018-03-01 03:24:00 80