R 在data.table';s';由';

R 在data.table';s';由';,r,data.table,evaluation,R,Data.table,Evaluation,我想写一个函数outer\u-fun(),它做一些事情,还调用另一个函数internal\u-fun()。outer\u-fun()中的所有参数都传递给internal\u-fun() internal\u fun()对数据表进行一些计算(这是两个函数的参数)。要通过函数传递的另一个参数是by 以下是我所拥有的东西的草图: 库(data.table) 数据(“CO2”) setDT(二氧化碳) 我最终会通过以字符形式提供by信息来解决这个问题 library(data.table) data(

我想写一个函数
outer\u-fun()
,它做一些事情,还调用另一个函数
internal\u-fun()
outer\u-fun()
中的所有参数都传递给
internal\u-fun()

internal\u fun()
数据表
进行一些计算(这是两个函数的参数)。要通过函数传递的另一个参数是
by

以下是我所拥有的东西的草图:

库(data.table)
数据(“CO2”)
setDT(二氧化碳)

我最终会通过以字符形式提供by信息来解决这个问题

library(data.table)

data("CO2")
setDT(CO2)

outer_fun <- function(DT, by) {
  # some other stuff
  by <- substitute(by)
  inner_fun(DT, by)
}

inner_fun <- function(DT, by) {

  # add Plant to the by Information
  byFun <- c("Plant", as.character(by))

  # remove list oder c()- function names
  byFun <- byFun[!byFun %in% c(".", "list", "c")] 

  DT[, .(mean = mean(uptake)),
     by = byFun]
 }

# single column name unquoted
outer_fun(CO2, by = Type)[1:3]
#>    Plant   Type     mean
#> 1:   Qn1 Quebec 33.22857
#> 2:   Qn2 Quebec 35.15714
#> 3:   Qn3 Quebec 37.61429


#list of column names unquoted
outer_fun(CO2, by = .(Type, Treatment))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429

outer_fun(CO2, by = list(Type, Treatment))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429


# single column name as string
outer_fun(CO2, by = "Type")[1:3]
#>    Plant   Type     mean
#> 1:   Qn1 Quebec 33.22857
#> 2:   Qn2 Quebec 35.15714
#> 3:   Qn3 Quebec 37.61429


# multiple column names as string
outer_fun(CO2, by = c("Type", "Treatment"))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429

outer_fun(CO2, by = list("Type", "Treatment"))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429
库(data.table)
数据(“CO2”)
setDT(二氧化碳)
外Èfun植物类型处理平均值
#>1:Qn1魁北克非儿童33.22857
#>2:Qn2魁北克非儿童35.15714
#>3:Qn3魁北克非儿童37.61429
外部(CO2,by=列表(类型,处理))[1:3]
#>植物类型处理平均值
#>1:Qn1魁北克非儿童33.22857
#>2:Qn2魁北克非儿童35.15714
#>3:Qn3魁北克非儿童37.61429
#单列名作为字符串
外部乐趣(CO2,by=“Type”)[1:3]
#>株型平均数
#>1:Qn1魁北克33.22857
#>2:Qn2魁北克35.15714
#>3:Qn3魁北克37.61429
#多个列名作为字符串
外部(CO2,by=c(“类型”、“处理”))[1:3]
#>植物类型处理平均值
#>1:Qn1魁北克非儿童33.22857
#>2:Qn2魁北克非儿童35.15714
#>3:Qn3魁北克非儿童37.61429
外部乐趣(二氧化碳,由=列表(“类型”、“处理”))[1:3]
#>植物类型处理平均值
#>1:Qn1魁北克非儿童33.22857
#>2:Qn2魁北克非儿童35.15714
#>3:Qn3魁北克非儿童37.61429
以下是一个选项:


outer\u fun我觉得还是让
[.data.table
函数本身通过
参数处理
。这将使
内部乐趣
外部乐趣
更加容易。缺点是
外部乐趣
参数应在
中提供其他固定分组变量,如
Plant

outer_fun <- function(DT, ...) {
    inner_fun(DT, ...)
}
inner_fun <- function(DT, ...) {
    DT[, .(mean = mean(uptake)), ...]
}

您不想使用像
“Type”
“Type”
“Type”这样的字符串?对于
中的两列或更多列,您希望使用哪种语法?嗯,两者都是(字符串和表达式)。我想保留data.table中的所有内容。因此对于两列,c(“col1”,“col2”),“col1,col2”,(col1,col2)。我应该补充一点,我需要保留对col1>5之类表达式的支持。我认为@jangorecki有一个处理此类用例的PR。这不适用于
outer\u fun(CO2,outby=(Type,Treatment))
``substitute(DT[,(mean=mean(attraction)),(Plant,BY)],list(BY=inby))``内
internal\u fun()
产生
DT[,(平均值=平均值(摄取量)),(植物,(类型,处理))]
,其中
by
中的嵌套列表导致
eval()过程中出现错误。
我也去过那里。:/对不起,我还不清楚我到底在搜索什么(在简化用例的过程中丢失了这条信息)。我正在搜索一个解决方案,在该解决方案中,我可以通过添加任何内容,就像我将直接处理
数据表一样。请查看我的编辑。感谢您的建议。这确实简化了很多。但我确实需要通过
修复
library(data.table)

data("CO2")
setDT(CO2)

outer_fun <- function(DT, by) {
  # some other stuff
  by <- substitute(by)
  inner_fun(DT, by)
}

inner_fun <- function(DT, by) {

  # add Plant to the by Information
  byFun <- c("Plant", as.character(by))

  # remove list oder c()- function names
  byFun <- byFun[!byFun %in% c(".", "list", "c")] 

  DT[, .(mean = mean(uptake)),
     by = byFun]
 }

# single column name unquoted
outer_fun(CO2, by = Type)[1:3]
#>    Plant   Type     mean
#> 1:   Qn1 Quebec 33.22857
#> 2:   Qn2 Quebec 35.15714
#> 3:   Qn3 Quebec 37.61429


#list of column names unquoted
outer_fun(CO2, by = .(Type, Treatment))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429

outer_fun(CO2, by = list(Type, Treatment))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429


# single column name as string
outer_fun(CO2, by = "Type")[1:3]
#>    Plant   Type     mean
#> 1:   Qn1 Quebec 33.22857
#> 2:   Qn2 Quebec 35.15714
#> 3:   Qn3 Quebec 37.61429


# multiple column names as string
outer_fun(CO2, by = c("Type", "Treatment"))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429

outer_fun(CO2, by = list("Type", "Treatment"))[1:3]
#>    Plant   Type  Treatment     mean
#> 1:   Qn1 Quebec nonchilled 33.22857
#> 2:   Qn2 Quebec nonchilled 35.15714
#> 3:   Qn3 Quebec nonchilled 37.61429
outer_fun <- function(DT, outby) {
    inner_fun(DT, substitute(outby))
}

inner_fun <- function(DT, inby) {
    l <- as.list(inby)

    if (l[[1L]] == as.symbol("list") || l[[1L]] == as.symbol(".")) 
        l <- l[-1L]

    if (is.character(l[[length(l)]])) {
        if (length(l) == 1L)
            l <- strsplit(l[[1L]],",")[[1L]]
        l <- lapply(l, as.name)

        if (l[[1L]] == as.symbol("c"))
            l <- l[-1L]
    }

    BY <- as.call(c(as.symbol("list"), as.name("Plant"), l))
    eval(bquote(DT[, mean(uptake), .(BY)]))
}


outer_fun(CO2, outby = list(Type, Treatment))
outer_fun(CO2, outby = list(Type))
outer_fun(CO2, outby = .(Type, Treatment))
outer_fun(CO2, outby = list(Treatment == "chilled"))
outer_fun(CO2, outby = list(cut(conc, breaks = quantile(conc))))
outer_fun(CO2, outby = Type)

outer_fun(CO2, outby = "Type")
outer_fun(CO2, outby = c("Type", "Treatment"))
outer_fun(CO2, outby = "Type,Treatment")
outer_fun <- function(DT, ...) {
    inner_fun(DT, ...)
}
inner_fun <- function(DT, ...) {
    DT[, .(mean = mean(uptake)), ...]
}
outer_fun(CO2, by = .(Plant, Type, Treatment))
outer_fun(CO2, by = c("Plant", "Type", "Treatment"))
outer_fun(CO2, by = "Plant,Type,Treatment")
outer_fun(CO2, by = .(Plant, Treatment == "chilled"))
outer_fun(CO2, by = .(Plant, cut(conc, breaks = quantile(conc), include.lowest = T)))