如何在R中连接后缀为.x、.y的列

如何在R中连接后缀为.x、.y的列,r,dataframe,R,Dataframe,我必须创建一个数据框,其中包含特定日期期间传感器列表中的数据: DATE SENSOR1 SENSOR2 SENSOR3 SENSOR4 2020-04-20 00:00:00 1015 19.88 95.80 9.020 2020-04-20 00:10:00 1015 19.84 96.10 8.970 2020-04-20 00:20:00 1015 19.84 96.40 9.010 2020-04-20 00

我必须创建一个数据框,其中包含特定日期期间传感器列表中的数据:

DATE                SENSOR1 SENSOR2 SENSOR3 SENSOR4
2020-04-20 00:00:00 1015    19.88   95.80   9.020 
2020-04-20 00:10:00 1015    19.84   96.10   8.970 
2020-04-20 00:20:00 1015    19.84   96.40   9.010 
2020-04-20 00:30:00 1015    19.81   96.60   9.210
2020-04-20 00:40:00 1015    19.79   96.80   9.700 
2020-04-20 00:50:00 1015    19.81   97.00   8.870
最初,我创建了一个包含1列的数据框(DATE:包含指定日期之间所有日期的行,间隔10分钟)。通常它可以有数千行,但为了重现一个示例,我们可以保持简单:

periods <- data.frame(DATE = c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"))

有没有关于如何正确合并或在生成数据帧后修复它的想法?

您可以使用
pivot\u longer
from
tidyr
将所有内容放在一列中,然后使用
rbind
将所有内容放回宽格式中。您还需要使用
na.omit()
删除NAs

library(tidyr)
期间%>%
透视时间更长(-DATE)%>%
rbind(传感器%>%
透视时间更长(-DATE))%>%
na.省略()%>%
枢轴(名称从=名称,值从=值)
加入,由=c(“日期”、“名称”、“价值”)
#一个tibble:6x5
日期传感器1传感器2传感器3传感器4
1 2020-04-20 00:00:00    1015    19.9    95.8    9.02
2 2020-04-20 00:10:00    1015    19.8    96.1    8.97
3 2020-04-20 00:20:00    1015    19.8    96.4    9.01
4 2020-04-20 00:30:00 1010纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳纳
52020-04-20 00:40:00 1010不适用
6 2020-04-20 00:50:00 1010不适用
数据

periods <- data.frame(DATE= c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"), SENSOR1= c(1015, 1015, 1015, NA, NA, NA), SENSOR2= c(19.88, 19.84, 19.84, NA, NA, NA), SENSOR3= c(95.80, 96.10, 96.40, NA, NA, NA), SENSOR4= c(9.020, 8.970, 9.010, NA, NA, NA))
sensor <- data.frame(DATE= c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"), SENSOR1= c(NA, NA, NA, 1010, 1010, 1010))

期间合并变量的列名是重复的,但我需要用正确的度量值重叠NA
for (i in 1:length(sensors$ID)) {
  sensor <- dbGetQuery(con, paste0("SELECT DATE, VALUE FROM MEASURES WHERE DATE between '2020-04-20 00:00:00' and '2020-04-20 00:50:00' AND ID= ",sensors$ID[i]," ORDER BY DATE ASC"))
  # getting rid of milliseconds
  sensor$DATE <- as.character(round_date(sensor$DATE, "minute"))
  # Renaming the column with sensor's name
  names(sensor) <- c("DATE", sensors$SENSORNAME[i])

  periods <- merge(periods,sensor,by="DATE",all = TRUE)  

  rm(sensor)
}
periods <- data.frame(DATE= c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"), SENSOR1= c(1015, 1015, 1015, NA, NA, NA), SENSOR2= c(19.88, 19.84, 19.84, NA, NA, NA), SENSOR3= c(95.80, 96.10, 96.40, NA, NA, NA), SENSOR4= c(9.020, 8.970, 9.010, NA, NA, NA))
sensor <- data.frame(DATE= c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"), SENSOR1= c(NA, NA, NA, 1010, 1010, 1010))
DATE                SENSOR1.x SENSOR2.x SENSOR3.x SENSOR4.x SENSOR1.y SENSOR2.y SENSOR3.y SENSOR4.y
2020-04-20 00:00:00  1015      19.88     95.80     9.020      NA        NA        NA        NA
2020-04-20 00:10:00  1015      19.84     96.10     8.970      NA        NA        NA        NA 
2020-04-20 00:20:00  1015      19.84     96.40     9.010      NA        NA        NA        NA 
2020-04-20 00:30:00   NA        NA        NA        NA       1015      19.81     96.60     9.210
2020-04-20 00:40:00   NA        NA        NA        NA       1015      19.79     96.80     9.700 
2020-04-20 00:50:00   NA        NA        NA        NA       1015      19.81     97.00     8.870
library(tidyr)
periods %>%
  pivot_longer(-DATE) %>%
  rbind(sensor %>%
              pivot_longer(-DATE) ) %>%
  na.omit() %>%
  pivot_wider(names_from = name, values_from = value) 

Joining, by = c("DATE", "name", "value")
# A tibble: 6 x 5
  DATE                SENSOR1 SENSOR2 SENSOR3 SENSOR4
  <fct>                 <dbl>   <dbl>   <dbl>   <dbl>
1 2020-04-20 00:00:00    1015    19.9    95.8    9.02
2 2020-04-20 00:10:00    1015    19.8    96.1    8.97
3 2020-04-20 00:20:00    1015    19.8    96.4    9.01
4 2020-04-20 00:30:00    1010    NA      NA     NA   
5 2020-04-20 00:40:00    1010    NA      NA     NA   
6 2020-04-20 00:50:00    1010    NA      NA     NA 
periods <- data.frame(DATE= c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"), SENSOR1= c(1015, 1015, 1015, NA, NA, NA), SENSOR2= c(19.88, 19.84, 19.84, NA, NA, NA), SENSOR3= c(95.80, 96.10, 96.40, NA, NA, NA), SENSOR4= c(9.020, 8.970, 9.010, NA, NA, NA))
sensor <- data.frame(DATE= c("2020-04-20 00:00:00","2020-04-20 00:10:00","2020-04-20 00:20:00","2020-04-20 00:30:00","2020-04-20 00:40:00","2020-04-20 00:50:00"), SENSOR1= c(NA, NA, NA, 1010, 1010, 1010))