R 如何正确地将df转换为时间序列,并在同一管道中进行清理?

R 如何正确地将df转换为时间序列,并在同一管道中进行清理?,r,dataframe,dplyr,time-series,purrr,R,Dataframe,Dplyr,Time Series,Purrr,我试图将数据帧转换为时间序列对象(ts),然后使用过滤器(bkfilter():此过滤器清除时间序列噪声),这些任务将按组应用(decildf列) 以下是我的尝试: library(dplyr) library(mFilter) library(purrr) test <- df %>% ## nesting dataframe by group ## dplyr::ungroup() %>%

我试图将数据帧转换为时间序列对象(
ts
),然后使用过滤器(
bkfilter()
:此过滤器清除时间序列噪声),这些任务将按组应用(
decil
df列)

以下是我的尝试:

library(dplyr)
library(mFilter)
library(purrr)

test <-  df  %>%
            ## nesting dataframe by group ##
             dplyr::ungroup() %>% 
             dplyr::nest_by(decil) %>%
  
            ## converting each nested tibble to time-series ##
            dplyr::summarise(data_ts = purrr::map(data, ts)) %>%
                      
            ## applying cleaning filter to each nested time-series tibble ##
            mFilter::bkfilter(data_ts, pl=6,pu=32,type="fixed",drift=FALSE) %>%
  
            ## unnest and ungroup tibbles to get a final df ##
            tidyr::unnest(decil) %>%
            dplyr::ungroup()


数据
dfbkfilter需要规则间隔的数据,而您没有这些数据,因此我们使用黄土平滑器。您可能希望为黄土尝试不同的跨度参数,但这里我们使用默认值。此外,由于所有日期均为1月1日,因此我们将该列简化为年份。问题中的数据已更改,但下面的代码使用了末尾注释中显示的原始数据(除了我们仅复制前42行以减少测试数据)

或者,如果您有许多val列:

df %>%
  group_by(año) %>%
  mutate(decil = lubridate::year(decil),
    across(starts_with("val"), ~ fitted(loess(. ~ decil)))) %>%
  ungroup
要使用黄土创建规则间距的系列,请改用以下方法:

result <- df %>%
  group_by(año) %>%
  mutate(decil = lubridate::year(decil)) %>%
  group_modify(~ {
    rng <- range(.x$decil)
    d <- data.frame(decil = rng[1]:rng[2])
    data.frame(d, 
      val1 = predict(loess(val1 ~ decil, .x), d),
      val2 = predict(loess(val2 ~ decil, .x), d))
   }) %>%
   ungroup
结果%
(año)%>%
突变(十进制=润滑::年份(十进制))%>%
组_修改(~{
rng
library(dplyr)

df %>%
  group_by(año) %>%
  mutate(decil = lubridate::year(decil), 
         val1 = fitted(loess(val1 ~ decil)),
         val2 = fitted(loess(val2 ~ decil))) %>%
  ungroup
df %>%
  group_by(año) %>%
  mutate(decil = lubridate::year(decil),
    across(starts_with("val"), ~ fitted(loess(. ~ decil)))) %>%
  ungroup
result <- df %>%
  group_by(año) %>%
  mutate(decil = lubridate::year(decil)) %>%
  group_modify(~ {
    rng <- range(.x$decil)
    d <- data.frame(decil = rng[1]:rng[2])
    data.frame(d, 
      val1 = predict(loess(val1 ~ decil, .x), d),
      val2 = predict(loess(val2 ~ decil, .x), d))
   }) %>%
   ungroup
library(zoo)

val1.z <- read.zoo(result[-4], split = "año", index = "decil")
val2.z <- read.zoo(result[-3], split = "año", index = "decil")

val1.ts <- as.ts(val1.z)
val2.ts <- as.ts(val2.z)
df <- structure(list(año = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L), .Label = c("Decil 1", "Decil 2", "Decil 3", "Decil 4", 
"Decil 5", "Decil 6", "Decil 7", "Decil 8", "Decil 9", "Decil 10"
), class = "factor"), decil = structure(c(8035, 9496, 10227, 
10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 16071, 
16801, 17532, 8035, 9496, 10227, 10957, 11688, 12418, 12784, 
13149, 13879, 14610, 15340, 16071, 16801, 17532, 8035, 9496, 
10227, 10957, 11688, 12418, 12784, 13149, 13879, 14610, 15340, 
16071, 16801, 17532), class = "Date"), val1 = c(0.285801450939386, 
-0.299493284604054, 0.423776690674324, 0.48819479659131, -0.00851835182683933, 
0.670710736839509, 1.0584695348906, 0.241749748695944, 0.0203934290972816, 
-0.298146398803882, 0.454536882635523, 0.0317493839324935, 0.399645473642857, 
0.149656209777629, 0.285801450939386, -0.299493284604054, 0.423776690674324, 
0.48819479659131, -0.00851835182683933, 0.670710736839509, 1.0584695348906, 
0.241749748695944, 0.0203934290972816, -0.298146398803882, 0.454536882635523, 
0.0317493839324935, 0.399645473642857, 0.149656209777629, 0.285801450939386, 
-0.299493284604054, 0.423776690674324, 0.48819479659131, -0.00851835182683933, 
0.670710736839509, 1.0584695348906, 0.241749748695944, 0.0203934290972816, 
-0.298146398803882, 0.454536882635523, 0.0317493839324935, 0.399645473642857, 
0.149656209777629), val2 = c(0.308138265449932, -2.73923609850588, 
-0.116791699821611, 1.276823919767, -0.591033604229833, 2.217410209762, 
-1.22771361710334, 2.91323734975109, 1.9403678544531, -1.710307316049, 
0.114882675671299, 1.31658931355581, 1.46477971543751, 0.271712366085317, 
0.274660079065288, -1.20391288467938, 0.221116802499613, 1.68878772891571, 
-0.0553564396020631, 2.47304289689454, -1.49473828679342, 2.06995781551323, 
0.995111688295987, -1.23590101185821, -0.0513493069577327, 1.57518955283768, 
1.49952051319558, 0.569875774759632, 0.0595105282871455, -1.98229069194358, 
0.692781223252039, 1.45357299983422, 0.274519154273341, 2.50570229399105, 
-1.58798738651613, 1.8318771262172, 0.541185894582713, -1.09932567808292, 
0.159443784781767, 1.38386952581564, 1.42469406477504, 0.410413545862179
)), row.names = c(NA, -42L), class = c("tbl_df", "tbl", "data.frame"))