R 如何正确地将df转换为时间序列,并在同一管道中进行清理?
我试图将数据帧转换为时间序列对象(R 如何正确地将df转换为时间序列,并在同一管道中进行清理?,r,dataframe,dplyr,time-series,purrr,R,Dataframe,Dplyr,Time Series,Purrr,我试图将数据帧转换为时间序列对象(ts),然后使用过滤器(bkfilter():此过滤器清除时间序列噪声),这些任务将按组应用(decildf列) 以下是我的尝试: library(dplyr) library(mFilter) library(purrr) test <- df %>% ## nesting dataframe by group ## dplyr::ungroup() %>%
ts
),然后使用过滤器(bkfilter()
:此过滤器清除时间序列噪声),这些任务将按组应用(decil
df列)
以下是我的尝试:
library(dplyr)
library(mFilter)
library(purrr)
test <- df %>%
## nesting dataframe by group ##
dplyr::ungroup() %>%
dplyr::nest_by(decil) %>%
## converting each nested tibble to time-series ##
dplyr::summarise(data_ts = purrr::map(data, ts)) %>%
## applying cleaning filter to each nested time-series tibble ##
mFilter::bkfilter(data_ts, pl=6,pu=32,type="fixed",drift=FALSE) %>%
## unnest and ungroup tibbles to get a final df ##
tidyr::unnest(decil) %>%
dplyr::ungroup()
数据
dfbkfilter需要规则间隔的数据,而您没有这些数据,因此我们使用黄土平滑器。您可能希望为黄土尝试不同的跨度参数,但这里我们使用默认值。此外,由于所有日期均为1月1日,因此我们将该列简化为年份。问题中的数据已更改,但下面的代码使用了末尾注释中显示的原始数据(除了我们仅复制前42行以减少测试数据)
或者,如果您有许多val列:
df %>%
group_by(año) %>%
mutate(decil = lubridate::year(decil),
across(starts_with("val"), ~ fitted(loess(. ~ decil)))) %>%
ungroup
要使用黄土创建规则间距的系列,请改用以下方法:
result <- df %>%
group_by(año) %>%
mutate(decil = lubridate::year(decil)) %>%
group_modify(~ {
rng <- range(.x$decil)
d <- data.frame(decil = rng[1]:rng[2])
data.frame(d,
val1 = predict(loess(val1 ~ decil, .x), d),
val2 = predict(loess(val2 ~ decil, .x), d))
}) %>%
ungroup
结果%
(año)%>%
突变(十进制=润滑::年份(十进制))%>%
组_修改(~{
rng
library(dplyr)
df %>%
group_by(año) %>%
mutate(decil = lubridate::year(decil),
val1 = fitted(loess(val1 ~ decil)),
val2 = fitted(loess(val2 ~ decil))) %>%
ungroup
df %>%
group_by(año) %>%
mutate(decil = lubridate::year(decil),
across(starts_with("val"), ~ fitted(loess(. ~ decil)))) %>%
ungroup
result <- df %>%
group_by(año) %>%
mutate(decil = lubridate::year(decil)) %>%
group_modify(~ {
rng <- range(.x$decil)
d <- data.frame(decil = rng[1]:rng[2])
data.frame(d,
val1 = predict(loess(val1 ~ decil, .x), d),
val2 = predict(loess(val2 ~ decil, .x), d))
}) %>%
ungroup
library(zoo)
val1.z <- read.zoo(result[-4], split = "año", index = "decil")
val2.z <- read.zoo(result[-3], split = "año", index = "decil")
val1.ts <- as.ts(val1.z)
val2.ts <- as.ts(val2.z)
df <- structure(list(año = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("Decil 1", "Decil 2", "Decil 3", "Decil 4",
"Decil 5", "Decil 6", "Decil 7", "Decil 8", "Decil 9", "Decil 10"
), class = "factor"), decil = structure(c(8035, 9496, 10227,
10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 16071,
16801, 17532, 8035, 9496, 10227, 10957, 11688, 12418, 12784,
13149, 13879, 14610, 15340, 16071, 16801, 17532, 8035, 9496,
10227, 10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340,
16071, 16801, 17532), class = "Date"), val1 = c(0.285801450939386,
-0.299493284604054, 0.423776690674324, 0.48819479659131, -0.00851835182683933,
0.670710736839509, 1.0584695348906, 0.241749748695944, 0.0203934290972816,
-0.298146398803882, 0.454536882635523, 0.0317493839324935, 0.399645473642857,
0.149656209777629, 0.285801450939386, -0.299493284604054, 0.423776690674324,
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906,
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523,
0.0317493839324935, 0.399645473642857, 0.149656209777629, 0.285801450939386,
-0.299493284604054, 0.423776690674324, 0.48819479659131, -0.00851835182683933,
0.670710736839509, 1.0584695348906, 0.241749748695944, 0.0203934290972816,
-0.298146398803882, 0.454536882635523, 0.0317493839324935, 0.399645473642857,
0.149656209777629), val2 = c(0.308138265449932, -2.73923609850588,
-0.116791699821611, 1.276823919767, -0.591033604229833, 2.217410209762,
-1.22771361710334, 2.91323734975109, 1.9403678544531, -1.710307316049,
0.114882675671299, 1.31658931355581, 1.46477971543751, 0.271712366085317,
0.274660079065288, -1.20391288467938, 0.221116802499613, 1.68878772891571,
-0.0553564396020631, 2.47304289689454, -1.49473828679342, 2.06995781551323,
0.995111688295987, -1.23590101185821, -0.0513493069577327, 1.57518955283768,
1.49952051319558, 0.569875774759632, 0.0595105282871455, -1.98229069194358,
0.692781223252039, 1.45357299983422, 0.274519154273341, 2.50570229399105,
-1.58798738651613, 1.8318771262172, 0.541185894582713, -1.09932567808292,
0.159443784781767, 1.38386952581564, 1.42469406477504, 0.410413545862179
)), row.names = c(NA, -42L), class = c("tbl_df", "tbl", "data.frame"))