基于R中两个不同组的两行条件差

基于R中两个不同组的两行条件差,r,dplyr,data.table,R,Dplyr,Data.table,以下数据集包含每小时累积降雨量。我想通过计算连续两小时的降雨量差来计算每小时的降雨量 问题在于每天09:00时开始测量降雨量。因此,我想开始寻找两行连续的hr\u rain列之间的差异,从2018年2月6日的09小时到2018年3月6日的08小时,第二天同样如此 我不能简单地从09小时开始,然后再看接下来的24条记录,因为在某些日子里,hr\u rain的一些观测值丢失了。 样本数据如下: STATION CODE DATE HOUR hr_rain SHIVAMOGGA 1

以下数据集包含每小时累积降雨量。我想通过计算连续两小时的降雨量差来计算每小时的降雨量

问题在于每天09:00时开始测量降雨量。因此,我想开始寻找两行连续的
hr\u rain
列之间的差异,从2018年2月6日的09小时到2018年3月6日的08小时,第二天同样如此

我不能简单地从09小时开始,然后再看接下来的24条记录,因为在某些日子里,
hr\u rain
的一些观测值丢失了。
样本数据如下:

 STATION    CODE  DATE     HOUR hr_rain
SHIVAMOGGA  163 06/09/18    00  1.0
SHIVAMOGGA  163 06/09/18    04  1.0
SHIVAMOGGA  163 06/09/18    05  1.0
SHIVAMOGGA  163 06/09/18    06  1.5
SHIVAMOGGA  163 06/09/18    07  2.5
SHIVAMOGGA  163 06/09/18    08  2.5
SHIVAMOGGA  163 06/09/18    09  0.0
SHIVAMOGGA  163 06/09/18    10  0.5
SHIVAMOGGA  163 06/09/18    11  0.5
SHIVAMOGGA  163 06/09/18    12  0.5
SHIVAMOGGA  163 06/09/18    13  0.5
SHIVAMOGGA  163 06/09/18    14  0.5
SHIVAMOGGA  163 06/09/18    15  0.5
SHIVAMOGGA  163 06/09/18    16  0.5
SHIVAMOGGA  163 06/09/18    17  0.5
SHIVAMOGGA  163 06/09/18    18  0.5
SHIVAMOGGA  163 06/09/18    19  0.5
SHIVAMOGGA  163 06/10/18    03  0.5
SHIVAMOGGA  163 06/10/18    05  0.5
SHIVAMOGGA  163 06/10/18    06  0.5
SHIVAMOGGA  163 06/10/18    07  0.5
SHIVAMOGGA  163 06/10/18    08  0.5
SHIVAMOGGA  163 06/10/18    09  0.0
SHIVAMOGGA  163 06/10/18    10  0.0
我试过了

df %>%  group_by(DATE) %>% mutate( RAINFALL = hr_rain - lag(hr_rain, default = 0))
但它会根据日期进行分组,给出从每天0小时到23小时的结果,然后在第二天重新开始。但我想从每天09点开始,一直到第二天8点。 我想要的是:

STATION    CODE  DATE     HOUR hr_rain rainfall

SHIVAMOGGA  163 06/09/18    00  1.0    1
SHIVAMOGGA  163 06/09/18    04  1.0    0 
SHIVAMOGGA  163 06/09/18    05  1.0    0
SHIVAMOGGA  163 06/09/18    06  1.5    0.5 
SHIVAMOGGA  163 06/09/18    07  2.5    1 
SHIVAMOGGA  163 06/09/18    08  2.5    0
SHIVAMOGGA  163 06/09/18    09  0.0    0
SHIVAMOGGA  163 06/09/18    10  0.5    0.5 
SHIVAMOGGA  163 06/09/18    11  0.5    0
SHIVAMOGGA  163 06/09/18    12  0.5    0
SHIVAMOGGA  163 06/09/18    13  0.5    0
SHIVAMOGGA  163 06/09/18    14  0.5    0
SHIVAMOGGA  163 06/09/18    15  0.5    0
SHIVAMOGGA  163 06/09/18    16  0.5    0
SHIVAMOGGA  163 06/09/18    17  0.5    0
SHIVAMOGGA  163 06/09/18    18  0.5    0
SHIVAMOGGA  163 06/09/18    19  0.5    0
SHIVAMOGGA  163 06/10/18    03  0.5    0
SHIVAMOGGA  163 06/10/18    05  0.5    0
SHIVAMOGGA  163 06/10/18    06  0.5    0
SHIVAMOGGA  163 06/10/18    07  0.5    0
SHIVAMOGGA  163 06/10/18    08  0.5    0
SHIVAMOGGA  163 06/10/18    09  0.0    0
SHIVAMOGGA  163 06/10/18    10  0.0    0

作为R的初学者,我可能确实错过了一些简单的东西。如果您能了解如何在R中执行此操作,我将不胜感激。

您需要在
HOUR==9时创建组

library(dplyr)

df %>%
  group_by(gr = cumsum(HOUR == 9)) %>%
  #Maybe you'll also need to add more groups
  #group_by(STATION, CODE, gr = cumsum(HOUR == 9)) %>%
  mutate(RAINFALL = hr_rain - lag(hr_rain, default = 0))

#  STATION     CODE DATE      HOUR hr_rain RAINFALL
#   <fct>      <int> <fct>    <int>   <dbl>    <dbl>
# 1 SHIVAMOGGA   163 06/09/18     0     1        1  
# 2 SHIVAMOGGA   163 06/09/18     4     1        0  
# 3 SHIVAMOGGA   163 06/09/18     5     1        0  
# 4 SHIVAMOGGA   163 06/09/18     6     1.5      0.5
# 5 SHIVAMOGGA   163 06/09/18     7     2.5      1  
# 6 SHIVAMOGGA   163 06/09/18     8     2.5      0  
# 7 SHIVAMOGGA   163 06/09/18     9     0        0  
# 8 SHIVAMOGGA   163 06/09/18    10     0.5      0.5
# 9 SHIVAMOGGA   163 06/09/18    11     0.5      0  
#10 SHIVAMOGGA   163 06/09/18    12     0.5      0  
# … with 14 more rows
库(dplyr)
df%>%
分组依据(gr=cumsum(小时==9))%>%
#也许你还需要添加更多的组
#分组依据(车站、代码、gr=cumsum(小时==9))%>%
突变(雨量=hr_雨-滞后(hr_雨,默认值=0))
#车站代码日期小时小时雨量
#                     
#1 SHIVAMOGGA 16306/09/2018 01 1
#2 SHIVAMOGGA 16306/09/18 4 1 0
#3 SHIVAMOGGA 163 06/09/18 5 1 0
#4 SHIVAMOGGA 16306/09/18 6 1.50.5
#5 SHIVAMOGGA 163 06/09/18 7 2.5 1
#6 SHIVAMOGGA 163 06/09/18 8 2.50
#7 SHIVAMOGGA 163 06/09/18 9 0 0 0
#8 SHIVAMOGGA 16306/09/1810.50.5
#9 SHIVAMOGGA 16306/09/11810.50
#10 SHIVAMOGGA 16306/09/18120.50
#…还有14行
数据

df <- structure(list(STATION = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = "SHIVAMOGGA", class = "factor"), CODE = c(163L, 
163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 
163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 
163L), DATE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("06/09/18", 
"06/10/18"), class = "factor"), HOUR = c(0L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 3L, 
5L, 6L, 7L, 8L, 9L, 10L), hr_rain = c(1, 1, 1, 1.5, 2.5, 2.5, 
0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0, 0)), class = "data.frame", row.names = c(NA, -24L))

df当
HOUR==9

library(dplyr)

df %>%
  group_by(gr = cumsum(HOUR == 9)) %>%
  #Maybe you'll also need to add more groups
  #group_by(STATION, CODE, gr = cumsum(HOUR == 9)) %>%
  mutate(RAINFALL = hr_rain - lag(hr_rain, default = 0))

#  STATION     CODE DATE      HOUR hr_rain RAINFALL
#   <fct>      <int> <fct>    <int>   <dbl>    <dbl>
# 1 SHIVAMOGGA   163 06/09/18     0     1        1  
# 2 SHIVAMOGGA   163 06/09/18     4     1        0  
# 3 SHIVAMOGGA   163 06/09/18     5     1        0  
# 4 SHIVAMOGGA   163 06/09/18     6     1.5      0.5
# 5 SHIVAMOGGA   163 06/09/18     7     2.5      1  
# 6 SHIVAMOGGA   163 06/09/18     8     2.5      0  
# 7 SHIVAMOGGA   163 06/09/18     9     0        0  
# 8 SHIVAMOGGA   163 06/09/18    10     0.5      0.5
# 9 SHIVAMOGGA   163 06/09/18    11     0.5      0  
#10 SHIVAMOGGA   163 06/09/18    12     0.5      0  
# … with 14 more rows
库(dplyr)
df%>%
分组依据(gr=cumsum(小时==9))%>%
#也许你还需要添加更多的组
#分组依据(车站、代码、gr=cumsum(小时==9))%>%
突变(雨量=hr_雨-滞后(hr_雨,默认值=0))
#车站代码日期小时小时雨量
#                     
#1 SHIVAMOGGA 16306/09/2018 01 1
#2 SHIVAMOGGA 16306/09/18 4 1 0
#3 SHIVAMOGGA 163 06/09/18 5 1 0
#4 SHIVAMOGGA 16306/09/18 6 1.50.5
#5 SHIVAMOGGA 163 06/09/18 7 2.5 1
#6 SHIVAMOGGA 163 06/09/18 8 2.50
#7 SHIVAMOGGA 163 06/09/18 9 0 0 0
#8 SHIVAMOGGA 16306/09/1810.50.5
#9 SHIVAMOGGA 16306/09/11810.50
#10 SHIVAMOGGA 16306/09/18120.50
#…还有14行
数据

df <- structure(list(STATION = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = "SHIVAMOGGA", class = "factor"), CODE = c(163L, 
163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 
163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 163L, 
163L), DATE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("06/09/18", 
"06/10/18"), class = "factor"), HOUR = c(0L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 3L, 
5L, 6L, 7L, 8L, 9L, 10L), hr_rain = c(1, 1, 1, 1.5, 2.5, 2.5, 
0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0, 0)), class = "data.frame", row.names = c(NA, -24L))

df I按小时分组==9,但对于9小时,其给出的结果为9小时降雨量-8小时降雨量(即,这里不是0,而是给出的-2.5)@Ajay是您的实际数据帧,不同于您在此处共享的数据,因为我认为使用此处共享的数据,它会给出预期的输出。我的数据框还有两列,分别是DISTRICT和TALUK。正如我在前面的评论中所说的,我得到了9小时降雨量和8小时降雨量的差值。(即-2.5)。@Ajay-ok,这可能是因为您的环境中加载了
plyr
屏蔽
mutate
,请尝试
df%>%groupby(gr=cumsum(HOUR==9))%%>%dplyr::mutate(rain=hr\u rain-lag(hr\u rain,default=0))
我认为您无法正确粘贴
dput
。最好将其添加到您的帖子中。您的
HOUR
列是整数还是字符?如果是字符,您可以尝试将
groupby
语句更改为
groupby(gr=cumsum(HOUR='09'))
I按HOUR==9分组,但对于9小时,其给出的结果是9小时降雨量-8小时降雨量(即,这里不是0,而是给出-2.5)@Ajay是您的实际数据帧与此处共享的数据不同,因为我认为使用此处共享的数据可以提供预期的输出。我的数据框还有两列,分别是DISTRICT和TALUK。正如我在前面的评论中所说的,我得到了9小时降雨量和8小时降雨量的差值。(即-2.5)。@Ajay-ok,这可能是因为您的环境中加载了
plyr
屏蔽
mutate
,请尝试
df%>%groupby(gr=cumsum(HOUR==9))%%>%dplyr::mutate(rain=hr\u rain-lag(hr\u rain,default=0))
我认为您无法正确粘贴
dput
。最好将其添加到您的帖子中。您的
HOUR
列是整数还是字符?如果是字符,您可以尝试将
groupby
语句更改为
groupby(gr=cumsum(HOUR='09'))