自动将函数应用于R中的多个列表

自动将函数应用于R中的多个列表,r,list,R,List,我有许多包含数据帧的列表,我有一些代码,当其中一个列表被命名时,这些代码可以执行我想要的功能。我想做的是使流程自动化,以便在我的环境中的每个列表上执行功能(环境还包含我不希望受影响的其他内容)。在创建新对象时,我还希望根据列表名自动命名它们。请注意,函数在列表的每个元素中的一个变量上执行,而不是在列表或所有变量上作为一个整体执行 在下面的代码中,M10210102.list是其中一个列表的名称INT/EXT任何内容都是列表元素中包含的变量之一。在下面的故障代码中,namesMCH0list是一个

我有许多包含数据帧的列表,我有一些代码,当其中一个列表被命名时,这些代码可以执行我想要的功能。我想做的是使流程自动化,以便在我的环境中的每个列表上执行功能(环境还包含我不希望受影响的其他内容)。在创建新对象时,我还希望根据列表名自动命名它们。请注意,函数在列表的每个元素中的一个变量上执行,而不是在列表或所有变量上作为一个整体执行

在下面的代码中,
M10210102.list
是其中一个列表的名称
INT
/
EXT
任何内容都是列表元素中包含的变量之一。在下面的故障代码中,
namesMCH0list
是一个仅包含所有列表名称的列表
ONT.list
是通过拆分ONT.list创建所有较小列表的主列表

我的问题是:有没有一种方法可以有效地自动化这些功能?提前谢谢

以下是我要执行的功能:

PercChangeDiff <- lapply(M10210102.list, function(x) {

  INTdif <- c(NA, diff(x[["INTprice"]]))
  EXTdif <- c(NA, diff(x[["EXTprice"]]))
  INTperc <- (INTdif / x[["INTprice"]]) * 100
  EXTperc <- (EXTdif / x[["EXTprice"]]) * 100
  return(list(x[["WEEK"]], INTperc, EXTperc))
}
)

for(i in seq_along(PercChangeDiff)){
  names(PercChangeDiff[[i]]) <- c("WEEK","INTpercent", "EXTpercent")
}


#removing elements from list if they have fewer than 34 observations 
for (i in rev(seq_along(PercChangeDiff))){
  if (length(PercChangeDiff[[i]][["INTpercent"]]) < 34) (PercChangeDiff[[i] <- NULL)  
}

#removing elements from list if INTprice or EXTprice does not change
for (i in rev(seq_along(PercChangeDiff))){
  if (length(unique(PercChangeDiff[[i]][["INTpercent"]])) < 2) (PercChangeDiff[[i]] <- NULL) 
  if (length(unique(PercChangeDiff[[i]][["EXTpercent"]])) < 2) (PercChangeDiff[[i]] <- NULL) 
}

#############

####### AGGREGATING MEANS PER DATE FOR ALL ARTS WITHIN MCH0 #######

#removing first date 
for (i in seq_along(PercChangeDiff)){
  PercChangeDiff[[i]][["WEEK"]][[1]] <- NA
}

#aggregating means
library(tidyverse)
PercChangeAvg <- map(PercChangeDiff,as_tibble) %>%
  bind_rows %>%
  group_by(WEEK) %>%
  summarize_all(mean)


 PercChangeAvg <- PercChangeAvg[complete.cases(PercChangeAvg), ]

############### CREATING TIME SERIES ####################

#create a list of timeseries values for INT and EXT
timeINTavg <- ts(PercChangeAvg$INTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
timeEXTavg <- ts(PercChangeAvg$EXTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))


#applying ccf to list of timeseries data
crossCorrAvg <- ccf(timeINTavg, timeEXTavg, lag.max = 100)

考虑使用内部数据帧并从中运行筛选。为了完全自动化,在主函数中概括您的过程,返回最后一个对象crossCorrAvg


master_function_过程请求帮助时,您应该包括一个简单的示例输入和所需输出,用于测试和验证可能的解决方案。存在一些拼写错误和不一致。您永远不会返回INTdif、EXTdif或INTprice和EXTprice。最后,crossCorr从未被分配,所以很可能是crossCorrAvg?@MrFlick鉴于我有很多大的列表,你建议复制什么?正如常见问题解答所说,
dput()
输出可能很难处理。请举一个只有两个对象或其他对象的小例子。我们不需要看到你的“真实”数据。我们只需要一些东西来测试,如果它在测试中起作用,它应该在您的真实数据上起作用。@Parfait INTdif/EXTdif不需要返回,它们被用来计算INTperc/EXTperc(我知道这可能不是最有效的编码方式,但它是有效的)。INTprice和EXT price是列表元素中的现有变量。关于crossCorr,你说得对,这是以前的代码的一部分,从未被改编成最新版本,应该是crossCorr Avg。谢谢你写出来。然而,它产生了两个情节,这很奇怪。有没有办法为每个现有列表创建单独的PercChangeAvg列表,以便我在需要时可以引用它们?在
return(PercChangeAvg)
末尾添加一个返回行,或者在list return、
return(list(PercChangeAvg,crossCorrAvg)
末尾添加一个返回行。默认情况下,函数返回最后一行(这里是crossCorrAvg)。不确定绘图的位置。请确保从干净/空的R会话开始。
for (g in seq_along(namesMCH0list)){
  lapply(get(namesMCH0list[g]), function(x) {

    INTdif <- c(NA, diff(x[["INTprice"]]))
    EXTdif <- c(NA, diff(x[["EXTprice"]]))
    INTperc <- (INTdif / x[["INTprice"]]) * 100
    EXTperc <- (EXTdif / x[["EXTprice"]]) * 100
    return( assign(paste("PercChangeDiff", paste(namesMCH0list[g]), sep = "."), list(x[["WEEK"]], INTperc, EXTperc)))
  }
  )
}

##this does not work ##
for (i in seq_along(ONT.list)){
  x <<- paste(namesMCH0list[i])
  for (g in rev(seq_along(get(x)))){
    if (length((get(x))[[g]][["INTprice"]]) < 20) ((get(x))[[g]] <<- NULL)  
  }
}


for (g in seq_along(ONT.list)){ 
  x <<- paste(namesMCH0list[g])
  lapply(paste(x), function(x) { 
  if (length(get(x)[["INTprice"]]) < 20) (NULL)
})
}


for(w in seq_along(ONT.list)){
  lapply(get(namesMCH0list[g]), function (x) {
    if(length(x[["INTprice"]] < 34 )) (x <- NULL)
  })
  NULL
}
 list(structure(list(WEEK = structure(c(17441, 17448, 17455, 17462, 17469, 17476, 17483, 17490, 17497, 17504, 17511, 17518, 17546, 17553, 17560, 17567, 17574, 17581, 17588, 17595, 17602, 17609, 17616, 17623, 17630, 17637, 17644, 17651, 17658, 17665, 17672, 17679, 17686, 17693), class = "Date"), REP_ART_UOM = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "20180929-EA", class = "factor"), MCH_0_CD = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "M10210101", class = "factor"), INTprice = c(3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97), EXTprice = c(4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48)), row.names = c(931L, 3626L, 6325L, 9021L, 11709L, 14368L, 17008L, 19764L, 22528L, 25193L, 27849L, 30489L, 33126L, 35769L, 38426L, 41141L, 44030L, 46911L, 49770L, 52643L, 55538L, 58423L, 61320L, 64256L, 67195L, 70117L, 73049L, 75982L, 78950L, 81924L, 84886L, 87848L, 90816L, 93778L), class = "data.frame"), 
structure(list(WEEK = structure(c(17441, 17448, 17455, 17462, 
17469, 17476, 17483, 17490, 17497, 17504, 17511, 17518, 17546, 
17553, 17560, 17567, 17574, 17581, 17588, 17595, 17602, 17609, 
17616, 17623, 17630, 17637, 17644, 17651, 17658, 17665, 17672, 
17679, 17686, 17693), class = "Date"), REP_ART_UOM = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "20323113-EA", class = "factor"), MCH_0_CD = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "M10210101", class = "factor"), INTprice = c(3.47, 
3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 
3.47, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2.47, 2.47, 2.47, 
2.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47), EXTprice = c(2, 
2, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 2, 2, 2, 2, 
2.03, 2, 2, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(1138L, 3834L, 
6537L, 9232L, 11917L, 14547L, 17199L, 19956L, 22718L, 25381L, 
28036L, 30673L, 33312L, 35955L, 38609L, 41357L, 44247L, 47124L, 
49984L, 52859L, 55752L, 58636L, 61536L, 64470L, 67408L, 70330L, 
73262L, 76204L, 79171L, 82147L, 85107L, 88068L, 91035L, 93994L
), class = "data.frame"))
master_function_process <- function(currlist) {
    PercChangeDiff <- lapply(currlist, function(x) {  
        # NEW DATAFRAME COLUMNS 
        x$INTdif <- c(NA, diff(x$INTprice))
        x$EXTdif <- c(NA, diff(x$EXTprice))
        x$INTpercent <- (x$INTdif / x$INTprice) * 100
        x$EXTpercent <- (x$EXTdif / x$EXTprice) * 100

        # DATAFRAME SUBSETTED COLUMNS
        tmp <- x[c("WEEK", "INTprice", "EXTprice", "INTpercent", "EXTpercent")]

        # FILTERS
        tmp <- tmp[tmp$INTpercent >= 34,]
        tmp <- tmp[tmp$INTprice >= 2 | tmp$EXTprice >= 2,]

        # REMOVE FIRST DATE
        tmp$WEEK[[1]] <- NA
        return(tmp)
   })

   # AVERAGE AGGREGATION BY WEEK (NO map)
   PercChangeAvg <- bind_rows(PercChangeDiff) %>%
        group_by(WEEK) %>%
        summarize_all(mean) %>%
        filter(complete.cases(.))

   # TIME SERIES VALUES FOR INT AND EXT
   timeINTavg <- ts(PercChangeAvg$INTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
   timeEXTavg <- ts(PercChangeAvg$EXTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))

  # APPLY CCF AND ADD snames
  crossCorrAvg <- ccf(timeINTavg, timeEXTavg, lag.max = 100)
  crossCorrAvg <- lapply(crossCorrAvg, function(i) within(i, snames <- names(i))
}

# PROCESS ALL LISTS RETRIEVED WITH mget
master_crossCorrAvg_list <- lapply(mget(namesMCH0list), master_function_process)