R-从长度不完全相同的数据列表创建数据帧

R-从长度不完全相同的数据列表创建数据帧,r,list,dataframe,R,List,Dataframe,我有一个数据帧列表,我想将其转换为单个数据帧。 以下是我的数据子集: date <- c("4/10/2016","4/11/2016","4/12/2016") time <- c("1:00:00 AM","1:15:00 AM", "1:30:00 AM") temp <- c(86.1,85.3,85.7) humidity <- c(39.9,39.5,39.1) object <- data.frame(date,

我有一个数据帧列表,我想将其转换为单个数据帧。 以下是我的数据子集:

    date <- c("4/10/2016","4/11/2016","4/12/2016")
    time <- c("1:00:00 AM","1:15:00 AM", "1:30:00 AM")
    temp <- c(86.1,85.3,85.7)
    humidity <- c(39.9,39.5,39.1)
    object <- data.frame(date, time, temp, humidity)

    date2 <- c("4/10/2016","4/11/2016","4/12/2016","4/13/2016")
    time2 <- c("1:00:00 AM","1:15:00 AM","1:30:00 AM","1:45:00 AM")
    temp2 <- c(86.1,85.3,85.7,86.2)
    humidity2 <- c(39.9,39.5,39.1,40.0)
    object2 <- data.frame(date2, time2, temp2, humidity2)

    data1 <- list(object, object2)
因为数据帧的长度不同,我没有成功地将它们放在一个数据帧中。我尝试用NAs填充数据帧,但没有成功。我最后用NAs添加了新的列。我是新的编码,所以任何解释都会有帮助。请让我知道,如果有什么我可以做,以改善我的问题

更新: 这是我的数据,其中一个传感器在其他传感器启动后启动

    date <- c("4/10/2016","4/11/2016","4/12/2016")
    time <- c("1:00:00 AM","1:15:00 AM", "1:30:00 AM")
    temp <- c(86.1,85.3,85.7)
    humidity <- c(39.9,39.5,39.1)
    object <- data.frame(date, time, temp, humidity)

    date <- c("4/10/2016","4/11/2016","4/12/2016","4/13/2016")
    time <- c("1:00:00 AM","1:15:00 AM","1:30:00 AM","1:45:00 AM")
    temp2 <- c(86.1,85.3,85.7,86.2)
    humidity2 <- c(39.9,39.5,39.1,40.0)
    object2 <- data.frame(date, time, temp2, humidity2)

    date <- c("4/10/2016","4/11/2016","4/12/2016","4/13/2016")
    time <- c("1:00:00 AM","1:15:00 AM","1:30:00 AM","1:45:00 AM")
    temp3 <- c(NA,84.3,84.7,85.2)
    humidity3 <- c(NA,40.1,39.7,40.5)
    object3 <- data.frame(date, time, temp3, humidity3)

    data1 <- list(object, object2, object3)

要将
rbind
列表元素放在一起,首先需要使每个列表元素的列名称相同

names(data1[[2]]) <- names(data1[[1]])
pacakge
data.table
有一个
rbindlist
函数,用于rbinds列表元素。由于列名不同,您可以指定
use.names=T
&
fill=T
,以便在列名不匹配的地方填充NAs。或者,保留
use.names=F
,在此情况下,icase将使用列的顺序并假定它们相同

library(data.table)

rbindlist(data1)
#         date       time temp humidity
# 1: 4/10/2016 1:00:00 AM 86.1     39.9
# 2: 4/11/2016 1:15:00 AM 85.3     39.5
# 3: 4/12/2016 1:30:00 AM 85.7     39.1
# 4: 4/10/2016 1:00:00 AM 86.1     39.9
# 5: 4/11/2016 1:15:00 AM 85.3     39.5
# 6: 4/12/2016 1:30:00 AM 85.7     39.1
# 7: 4/13/2016 1:45:00 AM 86.2     40.0

rbindlist(data1, use.names = T, fill = T)
#               date       time temp humidity     date2      time2 temp2 humidity2
# 1: 4/10/2016 1:00:00 AM 86.1     39.9        NA         NA    NA        NA
# 2: 4/11/2016 1:15:00 AM 85.3     39.5        NA         NA    NA        NA
# 3: 4/12/2016 1:30:00 AM 85.7     39.1        NA         NA    NA        NA
# 4:        NA         NA   NA       NA 4/10/2016 1:00:00 AM  86.1      39.9
# 5:        NA         NA   NA       NA 4/11/2016 1:15:00 AM  85.3      39.5
# 6:        NA         NA   NA       NA 4/12/2016 1:30:00 AM  85.7      39.1
# 7:        NA         NA   NA       NA 4/13/2016 1:45:00 AM  86.2      40.0
如果您希望执行
SQL
类型的联接,则使用
merge
(在base R中)

更新

根据您的编辑,您正在寻找一个正确的连接,因此使用
merge
all.y=T

    merge(data1[[1]], data1[[2]], by.x = c("date"), by.y = c("date2"), all.y = T)
#         date       time temp humidity      time2 temp2 humidity2
# 1 4/10/2016 1:00:00 AM 86.1     39.9 1:00:00 AM  86.1      39.9
# 2 4/11/2016 1:15:00 AM 85.3     39.5 1:15:00 AM  85.3      39.5
# 3 4/12/2016 1:30:00 AM 85.7     39.1 1:30:00 AM  85.7      39.1
# 4 4/13/2016       <NA>   NA       NA 1:45:00 AM  86.2      40.0   
merge(data1[[1]],data1[[2]],by.x=c(“日期”),by.y=c(“日期2”),all.y=T)
#日期时间温度湿度时间2温度2湿度2
#2016年10月14日上午1:00:00 86.1 39.9上午1:00:00 86.1 39.9
#2016年4月11日凌晨1:15:00 85.3 39.5凌晨1:15:00 85.3 39.5
#3 2016年4月12日上午1:30:00 85.7 39.1上午1:30:00 85.7 39.1
#2016年4月13日北美1:45:00上午86.240.0

我认为
rbind
是您要找的。还是
merge
?结果不清楚您想要什么,可能是
merge(object,object2,by.x=c(“日期”,“时间”),by.y=c(“日期”,“时间2”),all=TRUE)
rbind正在将其转换为矩阵。我接着使用了as.data.frame,但它将我的数据弄得乱七八糟。@thelatemail:事实上,我的列表中有20个数据帧,这使得合并无法使用(据我所知)。我想要一个带有日期和时间的单个数据框,后跟每个“数据框”中的温度和湿度。使用
dplyr
purrr
data1%>%map(setNames,names(data1[[1]]))%%>%bind_rows()%%>%distinct()
或基础等价物
unique(do.call(rbind.data.frame,lappy)(data1,setNames,names(data1[[1]]))))
…虽然两者都对你想要的东西做了一些假设。关于
rbindlist
,我想
dcast(rbindlist(data1,idcol=TRUE),date+time~.id,value.var=c(“temp”,“湿度”))
看起来更标准。@symbolxau感谢更新。事实上,我的数据有更多的数据帧(我可能应该在前面说),这使得合并无法使用。
do.call(rbind, data1)
#       date       time temp humidity
# 1 4/10/2016 1:00:00 AM 86.1     39.9
# 2 4/11/2016 1:15:00 AM 85.3     39.5
# 3 4/12/2016 1:30:00 AM 85.7     39.1
# 4 4/10/2016 1:00:00 AM 86.1     39.9
# 5 4/11/2016 1:15:00 AM 85.3     39.5
# 6 4/12/2016 1:30:00 AM 85.7     39.1
# 7 4/13/2016 1:45:00 AM 86.2     40.0
library(data.table)

rbindlist(data1)
#         date       time temp humidity
# 1: 4/10/2016 1:00:00 AM 86.1     39.9
# 2: 4/11/2016 1:15:00 AM 85.3     39.5
# 3: 4/12/2016 1:30:00 AM 85.7     39.1
# 4: 4/10/2016 1:00:00 AM 86.1     39.9
# 5: 4/11/2016 1:15:00 AM 85.3     39.5
# 6: 4/12/2016 1:30:00 AM 85.7     39.1
# 7: 4/13/2016 1:45:00 AM 86.2     40.0

rbindlist(data1, use.names = T, fill = T)
#               date       time temp humidity     date2      time2 temp2 humidity2
# 1: 4/10/2016 1:00:00 AM 86.1     39.9        NA         NA    NA        NA
# 2: 4/11/2016 1:15:00 AM 85.3     39.5        NA         NA    NA        NA
# 3: 4/12/2016 1:30:00 AM 85.7     39.1        NA         NA    NA        NA
# 4:        NA         NA   NA       NA 4/10/2016 1:00:00 AM  86.1      39.9
# 5:        NA         NA   NA       NA 4/11/2016 1:15:00 AM  85.3      39.5
# 6:        NA         NA   NA       NA 4/12/2016 1:30:00 AM  85.7      39.1
# 7:        NA         NA   NA       NA 4/13/2016 1:45:00 AM  86.2      40.0
merge(data1[[1]], data1[[2]], by.x = c("date","time","temp","humidity"), 
                              by.y = c("date2","time2","temp2","humidity2"), all = T)

#        date       time temp humidity
# 1 4/10/2016 1:00:00 AM 86.1     39.9
# 2 4/11/2016 1:15:00 AM 85.3     39.5
# 3 4/12/2016 1:30:00 AM 85.7     39.1
# 4 4/13/2016 1:45:00 AM 86.2     40.0
    merge(data1[[1]], data1[[2]], by.x = c("date"), by.y = c("date2"), all.y = T)
#         date       time temp humidity      time2 temp2 humidity2
# 1 4/10/2016 1:00:00 AM 86.1     39.9 1:00:00 AM  86.1      39.9
# 2 4/11/2016 1:15:00 AM 85.3     39.5 1:15:00 AM  85.3      39.5
# 3 4/12/2016 1:30:00 AM 85.7     39.1 1:30:00 AM  85.7      39.1
# 4 4/13/2016       <NA>   NA       NA 1:45:00 AM  86.2      40.0