将递归列表转换为data.R表
我有一个递归列表,我想将其合并到一个将递归列表转换为data.R表,r,list,merge,data.table,tidyr,R,List,Merge,Data.table,Tidyr,我有一个递归列表,我想将其合并到一个数据表中。顶级列表(mylist)的每个元素都有两个元素: 时间戳它是一个字符向量 值这是一个列表 尽管结构不同(下图),但每个元素似乎都有class数据。table 下图显示了此列表的结构(代码可能太长): 现在,我运行两个循环来获得一个组合的数据 循环1将时间戳转换为R日期时间,并设置键 new_list <- lapply(1:length(mylist), function(n){ z <- mylist[[n]] c1 <
数据表中。顶级列表(mylist
)的每个元素都有两个元素:
时间戳
它是一个字符向量
值
这是一个列表
尽管结构不同(下图),但每个元素似乎都有class数据。table
下图显示了此列表的结构(代码可能太长):
现在,我运行两个循环来获得一个组合的数据
循环1将时间戳
转换为R日期时间,并设置键
new_list <- lapply(1:length(mylist), function(n){
z <- mylist[[n]]
c1 <- as.POSIXct(z$Timestamp, format = '%Y-%m-%dT%H:%M:%S', tz = 'UTC')
c2 <- as.numeric(unlist(z$Value))
dt <- data.table(c1 = c1, c2 = c2)
colnames(dt) <- c('time', names(mylist)[n])
setkey(dt, 'time')
return((dt))
})
这是有效的-不确定是否有更快的方法来获得我想要的表格
数据
dput(mylist)
list(A = structure(list(Timestamp = c("2019-06-01T00:00:00Z",
"2019-06-01T00:15:00Z", "2019-06-01T00:30:00Z", "2019-06-01T00:45:00Z",
"2019-06-01T01:00:00Z", "2019-06-01T01:15:00Z", "2019-06-01T01:30:00Z",
"2019-06-01T01:45:00Z", "2019-06-01T02:00:00Z", "2019-06-01T02:15:00Z"
), Value = list(100.050957, 100.080826, 100.120308, 100.053459,
100.053825, 100.04792, 100.0679, 100.088554, 100.102737,
100.103653)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fe0a100a6e0>),
B = structure(list(Timestamp = c("2019-06-01T00:00:00Z",
"2019-06-01T00:15:00Z", "2019-06-01T00:30:00Z", "2019-06-01T00:45:00Z",
"2019-06-01T01:00:00Z", "2019-06-01T01:15:00Z", "2019-06-01T01:30:00Z",
"2019-06-01T01:45:00Z", "2019-06-01T02:00:00Z", "2019-06-01T02:15:00Z"
), Value = list(38.892395, 45.7738266, 53.21701, 57.08103,
62.1048546, 68.58914, 68.98703, 69.5170746, 71.49378,
78.59612)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fe0a100a6e0>),
C = structure(list(Timestamp = c("2019-06-01T00:00:00Z",
"2019-06-01T00:15:00Z", "2019-06-01T00:30:00Z", "2019-06-01T00:45:00Z",
"2019-06-01T01:00:00Z", "2019-06-01T01:15:00Z", "2019-06-01T01:30:00Z",
"2019-06-01T01:45:00Z", "2019-06-01T02:00:00Z", "2019-06-01T02:15:00Z"
), Value = list(30.5898361, 29.75237, 27.63596, 26.5089836,
25.6826324, 24.909977, 24.4333439, 23.5524445, 23.1864853,
22.7402916)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fe0a100a6e0>),
D = NULL, E = structure(list(Timestamp = c("2019-06-01T00:00:00Z",
"2019-06-01T00:15:00Z", "2019-06-01T00:30:00Z", "2019-06-01T00:45:00Z",
"2019-06-01T01:00:00Z", "2019-06-01T01:15:00Z", "2019-06-01T01:30:00Z",
"2019-06-01T01:45:00Z", "2019-06-01T02:00:00Z", "2019-06-01T02:15:00Z"
), Value = list(8.299942, 8.44268, 8.440144, 8.445086, 8.41551,
8.424382, 8.438655, 8.46398, 8.445853, 8.476906)), row.names = c(NA,
-10L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x7fe0a100a6e0>),
F = structure(list(Timestamp = c("2019-06-01T00:00:00Z",
"2019-06-01T00:15:00Z", "2019-06-01T00:30:00Z", "2019-06-01T00:45:00Z",
"2019-06-01T01:00:00Z", "2019-06-01T01:15:00Z", "2019-06-01T01:30:00Z",
"2019-06-01T01:45:00Z", "2019-06-01T02:00:00Z", "2019-06-01T02:15:00Z"
), Value = list(85.48002, 88.071, 87.71461, 86.2900848, 85.50101,
82.4923248, 81.78603, 82.4504547, 82.00605, 82.12493)), row.names = c(NA,
-10L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x7fe0a100a6e0>),
G = structure(list(Timestamp = c("2019-06-01T00:00:00Z",
"2019-06-01T00:15:00Z", "2019-06-01T00:30:00Z", "2019-06-01T00:45:00Z",
"2019-06-01T01:00:00Z", "2019-06-01T01:15:00Z", "2019-06-01T01:30:00Z",
"2019-06-01T01:45:00Z", "2019-06-01T02:00:00Z", "2019-06-01T02:15:00Z"
), Value = list(0.870313, 0.862552762, 0.8827777, 0.8639478,
0.849139452, 0.874981, 0.833493, 0.89307636, 0.8647241,
0.8711139)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x7fe0a100a6e0>))
dput(mylist)
列表(A=结构(列表(时间戳=c)(“2019-06-01T00:00:00Z”),
“2019-06-01T00:15:00Z”、“2019-06-01T00:30:00Z”、“2019-06-01T00:45:00Z”,
“2019-06-01T01:00:00Z”、“2019-06-01T01:15:00Z”、“2019-06-01T01:30:00Z”,
“2019-06-01T01:45:00Z”、“2019-06-01T02:00:00Z”、“2019-06-01T02:15:00Z”
),值=列表(100.050957100.080826100.120308100.053459,
100.053825, 100.04792, 100.0679, 100.088554, 100.102737,
100.103653),row.names=c(NA,-10L),class=c(“data.table”,
“data.frame”),.internal.selfref=),
B=结构(列表(时间戳=c(“2019-06-01T00:00:00Z”),
“2019-06-01T00:15:00Z”、“2019-06-01T00:30:00Z”、“2019-06-01T00:45:00Z”,
“2019-06-01T01:00:00Z”、“2019-06-01T01:15:00Z”、“2019-06-01T01:30:00Z”,
“2019-06-01T01:45:00Z”、“2019-06-01T02:00:00Z”、“2019-06-01T02:15:00Z”
),Value=list(38.892395,45.7738266,53.21701,57.08103,
62.1048546, 68.58914, 68.98703, 69.5170746, 71.49378,
78.59612),row.names=c(NA,-10L),class=c(“data.table”,
“data.frame”),.internal.selfref=),
C=结构(列表(时间戳=C(“2019-06-01T00:00:00Z”),
“2019-06-01T00:15:00Z”、“2019-06-01T00:30:00Z”、“2019-06-01T00:45:00Z”,
“2019-06-01T01:00:00Z”、“2019-06-01T01:15:00Z”、“2019-06-01T01:30:00Z”,
“2019-06-01T01:45:00Z”、“2019-06-01T02:00:00Z”、“2019-06-01T02:15:00Z”
),Value=list(30.5898361,29.75237,27.63596,26.5089836,,
25.6826324, 24.909977, 24.4333439, 23.5524445, 23.1864853,
22.7402916),row.names=c(NA,-10L),class=c(“data.table”,
“data.frame”),.internal.selfref=),
D=空,E=结构(列表(时间戳=c(“2019-06-01T00:00:00Z”),
“2019-06-01T00:15:00Z”、“2019-06-01T00:30:00Z”、“2019-06-01T00:45:00Z”,
“2019-06-01T01:00:00Z”、“2019-06-01T01:15:00Z”、“2019-06-01T01:30:00Z”,
“2019-06-01T01:45:00Z”、“2019-06-01T02:00:00Z”、“2019-06-01T02:15:00Z”
),Value=list(8.299942、8.44268、8.440144、8.445086、8.41551、,
8.424382,8.438655,8.46398,8.445853,8.476906),row.names=c(NA,
-10L),class=c(“data.table”,“data.frame”),.internal.selfref=),
F=结构(列表(时间戳=c(“2019-06-01T00:00:00Z”),
“2019-06-01T00:15:00Z”、“2019-06-01T00:30:00Z”、“2019-06-01T00:45:00Z”,
“2019-06-01T01:00:00Z”、“2019-06-01T01:15:00Z”、“2019-06-01T01:30:00Z”,
“2019-06-01T01:45:00Z”、“2019-06-01T02:00:00Z”、“2019-06-01T02:15:00Z”
),Value=list(85.48002,88.071,87.71461,86.2900848,85.50101,
82.4923248,81.78603,82.4504547,82.00605,82.12493),row.names=c(NA,
-10L),class=c(“data.table”,“data.frame”),.internal.selfref=),
G=结构(列表(时间戳=c(“2019-06-01T00:00:00Z”),
“2019-06-01T00:15:00Z”、“2019-06-01T00:30:00Z”、“2019-06-01T00:45:00Z”,
“2019-06-01T01:00:00Z”、“2019-06-01T01:15:00Z”、“2019-06-01T01:30:00Z”,
“2019-06-01T01:45:00Z”、“2019-06-01T02:00:00Z”、“2019-06-01T02:15:00Z”
),值=列表(0.870313,0.862552762,0.8827777,0.8639478,
0.849139452, 0.874981, 0.833493, 0.89307636, 0.8647241,
0.8711139),row.names=c(NA,-10L),class=c(“data.table”,
“data.frame”),.internal.selfref=))
我们可以通过unlist
将'Value'列list
转换为向量,然后rbindlist
将数据表的列表
转换为单个数据表。在这里,我们还假设OP除了在OP的帖子中显示的unlist
之外,还想做一些其他的预处理
library(data.table)
rbindlist(lapply(mylist, function(dat) if(!is.null(dat))
dat[, Value := unlist(Value)]), idcol = 'grp')
#grp Timestamp Value
#1: A 2019-06-01T00:00:00Z 100.0509570
#2: A 2019-06-01T00:15:00Z 100.0808260
#3: A 2019-06-01T00:30:00Z 100.1203080
#4: A 2019-06-01T00:45:00Z 100.0534590
#5: A 2019-06-01T01:00:00Z 100.0538250
#6: A 2019-06-01T01:15:00Z 100.0479200
#..
另外,请注意,使用嵌套的列表
列直接在列表
上应用rbindlist
不会自动将该列转换为向量
,即
str(rbindlist(mylist, idcol = TRUE))
#Classes ‘data.table’ and 'data.frame': 60 obs. of 3 variables:
# $ .id : chr "A" "A" "A" "A" ...
# $ Timestamp: chr "2019-06-01T00:00:00Z" "2019-06-01T00:15:00Z" #"2019-06-01T00:30:00Z" "2019-06-01T00:45:00Z" ...
# $ Value :List of 60
# ..$ : num 100
# ..$ : num 100
# ..$ : num 100
# ..$ : num 100
# ..$ : num 100
# ..$ : num 100
# ..$ : num 100
# ...
因此,我们可能必须在rbindlist
应用程序之前或之后执行unlist
ing
out <- rbindlist(mylist, idcol = 'grp')[, Value := unlist(Value)]
str(out)
#Classes ‘data.table’ and 'data.frame': 60 obs. of 3 variables:
# $ grp : chr "A" "A" "A" "A" ...
#$ Timestamp: chr "2019-06-01T00:00:00Z" "2019-06-01T00:15:00Z" "2019-06-01T00:30:00Z" "2019-06-01T00:45:00Z" ...
#$ Value : num 100 100 100 100 100 ...
更新
如果我们需要按“时间戳”执行合并
,一个选项是转换为xts
,然后执行合并
library(xts)
i1 <- !sapply(mylist, is.null)
mylist1 <- lapply(mylist[i1], function(dat) dat[, Value := unlist(Value)])
outn <- Reduce(merge, lapply(mylist1, function(x)
xts(x$Value, order.by = as.POSIXct(x$Timestamp, format = "%Y-%m-%dT%TZ"))))
colnames(outn) <- paste0("Value", seq_len(ncol(outn)))
outn
# Value1 Value2 Value3 Value4 Value5 Value6
#2019-06-01 00:00:00 100.0510 38.89240 30.58984 8.299942 85.48002 0.8703130
#2019-06-01 00:15:00 100.0808 45.77383 29.75237 8.442680 88.07100 0.8625528
#2019-06-01 00:30:00 100.1203 53.21701 27.63596 8.440144 87.71461 0.8827777
#2019-06-01 00:45:00 100.0535 57.08103 26.50898 8.445086 86.29008 0.8639478
#2019-06-01 01:00:00 100.0538 62.10485 25.68263 8.415510 85.50101 0.8491395
#2019-06-01 01:15:00 100.0479 68.58914 24.90998 8.424382 82.49232 0.8749810
#2019-06-01 01:30:00 100.0679 68.98703 24.43334 8.438655 81.78603 0.8334930
#2019-06-01 01:45:00 100.0886 69.51707 23.55244 8.463980 82.45045 0.8930764
#2019-06-01 02:00:00 100.1027 71.49378 23.18649 8.445853 82.00605 0.8647241
#2019-06-01 02:15:00 100.1037 78.59612 22.74029 8.476906 82.12493 0.8711139
或者另一个选项是bind_rows
fromdplyr
library(dplyr)
library(lubridate)
library(purrr)
out1 <- bind_rows(mylist, .id = 'grp')%>%
mutate(Value = flatten_dbl(Value), Timestamp = ymd_hms(Timestamp))
str(out1)
#'data.frame': 60 obs. of 3 variables:
# $ Timestamp: POSIXct, format: "2019-06-01 00:00:00" "2019-06-01 00:15:00" "2019-06-01 00:30:00" "2019-06-01 00:45:00" ...
# $ Value : num 100 100 100 100 100 ...
# $ grp : chr "A" "A" "A" "A" ...
库(dplyr)
图书馆(lubridate)
图书馆(purrr)
出局1%
变异(值=flatten_dbl(值),时间戳=ymd_hms(时间戳))
str(out1)
#“数据帧”:60 obs。共有3个变量:
#$Timestamp:POSIXct,格式:“2019-06-01 00:00:00”“2019-06-01 00:15:00”“2019-06-01 00:30:00”“2019-06-01 00:45:00”。。。
#$Value:num 100。。。
#$grp:chr“A”“A”“A”“A”。。。
如果我理解正确,OP希望创建一个data.table(OP使用的术语“merge”在这里有些误导)。这可以通过简单地调用rbindlist()
来实现rbindlist()
处理mylist
中的空元素
library(data.table)
rbindlist(mylist, idcol = TRUE)
在随后的步骤中,Timestamp
可以一次性强制为POSIXct
(而不是事先操作mylist
的单个元素):
我想您需要rbindlist
和dcast
。因为这类似于@Uwe和@akrun,所以我也会这样做
我熟悉rbindlist
但我想me
library(xts)
i1 <- !sapply(mylist, is.null)
mylist1 <- lapply(mylist[i1], function(dat) dat[, Value := unlist(Value)])
outn <- Reduce(merge, lapply(mylist1, function(x)
xts(x$Value, order.by = as.POSIXct(x$Timestamp, format = "%Y-%m-%dT%TZ"))))
colnames(outn) <- paste0("Value", seq_len(ncol(outn)))
outn
# Value1 Value2 Value3 Value4 Value5 Value6
#2019-06-01 00:00:00 100.0510 38.89240 30.58984 8.299942 85.48002 0.8703130
#2019-06-01 00:15:00 100.0808 45.77383 29.75237 8.442680 88.07100 0.8625528
#2019-06-01 00:30:00 100.1203 53.21701 27.63596 8.440144 87.71461 0.8827777
#2019-06-01 00:45:00 100.0535 57.08103 26.50898 8.445086 86.29008 0.8639478
#2019-06-01 01:00:00 100.0538 62.10485 25.68263 8.415510 85.50101 0.8491395
#2019-06-01 01:15:00 100.0479 68.58914 24.90998 8.424382 82.49232 0.8749810
#2019-06-01 01:30:00 100.0679 68.98703 24.43334 8.438655 81.78603 0.8334930
#2019-06-01 01:45:00 100.0886 69.51707 23.55244 8.463980 82.45045 0.8930764
#2019-06-01 02:00:00 100.1027 71.49378 23.18649 8.445853 82.00605 0.8647241
#2019-06-01 02:15:00 100.1037 78.59612 22.74029 8.476906 82.12493 0.8711139
plot(outn)
library(dplyr)
library(lubridate)
library(purrr)
out1 <- bind_rows(mylist, .id = 'grp')%>%
mutate(Value = flatten_dbl(Value), Timestamp = ymd_hms(Timestamp))
str(out1)
#'data.frame': 60 obs. of 3 variables:
# $ Timestamp: POSIXct, format: "2019-06-01 00:00:00" "2019-06-01 00:15:00" "2019-06-01 00:30:00" "2019-06-01 00:45:00" ...
# $ Value : num 100 100 100 100 100 ...
# $ grp : chr "A" "A" "A" "A" ...
library(data.table)
rbindlist(mylist, idcol = TRUE)
.id Timestamp Value
1: A 2019-06-01T00:00:00Z 100.051
2: A 2019-06-01T00:15:00Z 100.0808
3: A 2019-06-01T00:30:00Z 100.1203
4: A 2019-06-01T00:45:00Z 100.0535
5: A 2019-06-01T01:00:00Z 100.0538
6: A 2019-06-01T01:15:00Z 100.0479
7: A 2019-06-01T01:30:00Z 100.0679
8: A 2019-06-01T01:45:00Z 100.0886
9: A 2019-06-01T02:00:00Z 100.1027
10: A 2019-06-01T02:15:00Z 100.1037
11: B 2019-06-01T00:00:00Z 38.8924
12: B 2019-06-01T00:15:00Z 45.77383
13: B 2019-06-01T00:30:00Z 53.21701
14: B 2019-06-01T00:45:00Z 57.08103
15: B 2019-06-01T01:00:00Z 62.10485
16: B 2019-06-01T01:15:00Z 68.58914
17: B 2019-06-01T01:30:00Z 68.98703
18: B 2019-06-01T01:45:00Z 69.51707
19: B 2019-06-01T02:00:00Z 71.49378
20: B 2019-06-01T02:15:00Z 78.59612
21: C 2019-06-01T00:00:00Z 30.58984
22: C 2019-06-01T00:15:00Z 29.75237
23: C 2019-06-01T00:30:00Z 27.63596
24: C 2019-06-01T00:45:00Z 26.50898
25: C 2019-06-01T01:00:00Z 25.68263
26: C 2019-06-01T01:15:00Z 24.90998
27: C 2019-06-01T01:30:00Z 24.43334
28: C 2019-06-01T01:45:00Z 23.55244
29: C 2019-06-01T02:00:00Z 23.18649
30: C 2019-06-01T02:15:00Z 22.74029
31: E 2019-06-01T00:00:00Z 8.299942
32: E 2019-06-01T00:15:00Z 8.44268
33: E 2019-06-01T00:30:00Z 8.440144
34: E 2019-06-01T00:45:00Z 8.445086
35: E 2019-06-01T01:00:00Z 8.41551
36: E 2019-06-01T01:15:00Z 8.424382
37: E 2019-06-01T01:30:00Z 8.438655
38: E 2019-06-01T01:45:00Z 8.46398
39: E 2019-06-01T02:00:00Z 8.445853
40: E 2019-06-01T02:15:00Z 8.476906
41: F 2019-06-01T00:00:00Z 85.48002
42: F 2019-06-01T00:15:00Z 88.071
43: F 2019-06-01T00:30:00Z 87.71461
44: F 2019-06-01T00:45:00Z 86.29008
45: F 2019-06-01T01:00:00Z 85.50101
46: F 2019-06-01T01:15:00Z 82.49232
47: F 2019-06-01T01:30:00Z 81.78603
48: F 2019-06-01T01:45:00Z 82.45045
49: F 2019-06-01T02:00:00Z 82.00605
50: F 2019-06-01T02:15:00Z 82.12493
51: G 2019-06-01T00:00:00Z 0.870313
52: G 2019-06-01T00:15:00Z 0.8625528
53: G 2019-06-01T00:30:00Z 0.8827777
54: G 2019-06-01T00:45:00Z 0.8639478
55: G 2019-06-01T01:00:00Z 0.8491395
56: G 2019-06-01T01:15:00Z 0.874981
57: G 2019-06-01T01:30:00Z 0.833493
58: G 2019-06-01T01:45:00Z 0.8930764
59: G 2019-06-01T02:00:00Z 0.8647241
60: G 2019-06-01T02:15:00Z 0.8711139
.id Timestamp Value
library(data.table)
rbindlist(mylist, idcol = TRUE)[
, Timestamp := lubridate::as_datetime(Timestamp)][]
library(data.table)
bind_list = rbindlist(mylist, idcol = "ID")
dcast(bind_list, Timestamp ~ ID)
Timestamp A B C E F G
1: 2019-06-01T00:00:00Z 100.051 38.8924 30.58984 8.299942 85.48002 0.870313
2: 2019-06-01T00:15:00Z 100.0808 45.77383 29.75237 8.44268 88.071 0.8625528
3: 2019-06-01T00:30:00Z 100.1203 53.21701 27.63596 8.440144 87.71461 0.8827777
4: 2019-06-01T00:45:00Z 100.0535 57.08103 26.50898 8.445086 86.29008 0.8639478
5: 2019-06-01T01:00:00Z 100.0538 62.10485 25.68263 8.41551 85.50101 0.8491395
6: 2019-06-01T01:15:00Z 100.0479 68.58914 24.90998 8.424382 82.49232 0.874981
7: 2019-06-01T01:30:00Z 100.0679 68.98703 24.43334 8.438655 81.78603 0.833493
8: 2019-06-01T01:45:00Z 100.0886 69.51707 23.55244 8.46398 82.45045 0.8930764
9: 2019-06-01T02:00:00Z 100.1027 71.49378 23.18649 8.445853 82.00605 0.8647241
10: 2019-06-01T02:15:00Z 100.1037 78.59612 22.74029 8.476906 82.12493 0.8711139
library(tidyr)
library(dplyr)
bind_list = bind_rows(mylist, .id = "ID")
pivot_wider(bind_list, id_cols = Timestamp, values_from = Value, names_from = ID)
Timestamp A B C E F G
1 2019-06-01T00:00:00Z 100.051 38.8924 30.58984 8.299942 85.48002 0.870313
2 2019-06-01T00:15:00Z 100.0808 45.77383 29.75237 8.44268 88.071 0.8625528
3 2019-06-01T00:30:00Z 100.1203 53.21701 27.63596 8.440144 87.71461 0.8827777
4 2019-06-01T00:45:00Z 100.0535 57.08103 26.50898 8.445086 86.29008 0.8639478
5 2019-06-01T01:00:00Z 100.0538 62.10485 25.68263 8.41551 85.50101 0.8491395
6 2019-06-01T01:15:00Z 100.0479 68.58914 24.90998 8.424382 82.49232 0.874981
7 2019-06-01T01:30:00Z 100.0679 68.98703 24.43334 8.438655 81.78603 0.833493
8 2019-06-01T01:45:00Z 100.0886 69.51707 23.55244 8.46398 82.45045 0.8930764
9 2019-06-01T02:00:00Z 100.1027 71.49378 23.18649 8.445853 82.00605 0.8647241
10 2019-06-01T02:15:00Z 100.1037 78.59612 22.74029 8.476906 82.12493 0.8711139