Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/72.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
更改自定义dplyr函数中结果变量的名称 背景_R_Function_Dplyr_Lazy Evaluation_Summary - Fatal编程技术网

更改自定义dplyr函数中结果变量的名称 背景

更改自定义dplyr函数中结果变量的名称 背景,r,function,dplyr,lazy-evaluation,summary,R,Function,Dplyr,Lazy Evaluation,Summary,为了加快跨多个表生成分组摘要的速度;因为我在工作流中做了大部分工作,所以我起草了一个简单的函数来生成所需的度量 # Function to generate summary table generate_summary_tbl <- function(dataset, group_column, summary_column) { group_column <- enquo(group_column) summary_column <- enquo(summ

为了加快跨多个表生成分组摘要的速度;因为我在工作流中做了大部分工作,所以我起草了一个简单的函数来生成所需的度量

# Function to generate summary table
generate_summary_tbl <- function(dataset, group_column, summary_column) {
    group_column   <- enquo(group_column)
    summary_column <- enquo(summary_column)
    dataset %>% 
        group_by(!!group_column) %>% 
        summarise(
            mean = mean(!!summary_column),
            sum  = sum(!!summary_column)
            # Other metrics that need to be generated frequently
        ) %>% 
        ungroup -> smryDta
    return(smryDta)
}
区别在于变量名中存在
\u am
后缀
平均值
,依此类推

丑陋的解决方案 我使用的部分、丑陋的解决方案使用了
setNames

# Function to generate summary table
generate_summary_tbl <-
    function(dataset,
             group_column,
             summary_column,
             useColName = TRUE) {
        group_column   <- enquo(group_column)
        summary_column <- enquo(summary_column)
        dataset %>%
            group_by(!!group_column) %>%
            summarise(mean = mean(!!summary_column),
                      sum  = sum(!!summary_column)) %>%
            ungroup -> smryDta

        if (useColName) {
            setNames(smryDta,
                     c(deparse(substitute(
                         group_column
                     )),
                     paste(
                         names(smryDta)[2:length(smryDta)], paste0("_", deparse(substitute(
                             group_column
                         )))
                     ))) -> smryDta
        }

        return(smryDta)
    }
生成汇总表的函数 生成\u摘要\u tbl% 解组->smryDta if(useColName){ 设置名称(smryDta, c(德帕斯)(替代)( 组列 )), 粘贴( 名称(smryDta)[2:长度(smryDta)],粘贴0(“u2;”),底面(替换)( 组列 ))) )))->smryDta } 返回(smryDta) }
例子 返回的列名几乎与所需的结果匹配。我想我可以使用一些正则表达式来达到预期的结果。然而,我认为应该有更有效的解决办法

mtcars %>% 
    generate_summary_tbl(group_column = am, summary_column = mpg, useColName = TRUE)
# A tibble: 2 x 3
  `~am` `mean _~am` `sum _~am`
  <dbl>       <dbl>      <dbl>
1     0    17.14737      325.8
2     1    24.39231      317.1
mtcars%>%
生成摘要tbl(group\u column=am,summary\u column=mpg,useColName=TRUE)
#一个tibble:2x3
`~am``平均值~am``总和~am`
1     0    17.14737      325.8
2     1    24.39231      317.1

理想情况下,如何更好地使用or来获得所需的列名?

也许可以使用
重命名

library(tidyverse)

generate_summary_tbl <- function(dataset, group_column, summary_column, useColname = FALSE) {
    group_column   <- enquo(group_column)
    summary_column <- enquo(summary_column)
    dataset %>% 
        group_by(!!group_column) %>% 
        summarise(
            mean = mean(!!summary_column),
            sum  = sum(!!summary_column)
            # Other metrics that need to be generated frequently
        ) %>% 
        ungroup -> smryDta

    if (useColname) 
      smryDta <- smryDta %>%  
      rename_at(
        vars(-one_of(quo_name(group_column))), 
        ~paste(quo_name(group_column), .x, sep="_")
      )

    return(smryDta)
}

mtcars %>% generate_summary_tbl(am, mpg)
# # A tibble: 2 x 3
#      am     mean   sum
#   <dbl>    <dbl> <dbl>
# 1     0 17.14737 325.8
# 2     1 24.39231 317.1
mtcars %>% generate_summary_tbl(am, mpg, T)
#   # A tibble: 2 x 3
#      am  am_mean am_sum
#   <dbl>    <dbl>  <dbl>
# 1     0 17.14737  325.8
# 2     1 24.39231  317.1
库(tidyverse)
生成\u摘要\u tbl%
解组->smryDta
if(useColname)
smryDta%
将_重命名为(
变量(-quo_名称(组列))中的一个),
~paste(组名称(组列),.x,sep=“”)
)
返回(smryDta)
}
mtcars%>%生成摘要tbl(am、mpg)
##tibble:2 x 3
#平均和
#        
# 1     0 17.14737 325.8
# 2     1 24.39231 317.1
mtcars%>%生成摘要tbl(am、mpg、T)
##tibble:2 x 3
#我的意思是我的总数
#         
# 1     0 17.14737  325.8
# 2     1 24.39231  317.1

谢谢@lukeA,我想重命名可以做到这一点。我想我甚至可以按照
..%>%的建议尝试强制{如果(Y)添加(1)else.}
mtcars %>% 
    generate_summary_tbl(group_column = am, summary_column = mpg, useColName = TRUE)
# A tibble: 2 x 3
  `~am` `mean _~am` `sum _~am`
  <dbl>       <dbl>      <dbl>
1     0    17.14737      325.8
2     1    24.39231      317.1
library(tidyverse)

generate_summary_tbl <- function(dataset, group_column, summary_column, useColname = FALSE) {
    group_column   <- enquo(group_column)
    summary_column <- enquo(summary_column)
    dataset %>% 
        group_by(!!group_column) %>% 
        summarise(
            mean = mean(!!summary_column),
            sum  = sum(!!summary_column)
            # Other metrics that need to be generated frequently
        ) %>% 
        ungroup -> smryDta

    if (useColname) 
      smryDta <- smryDta %>%  
      rename_at(
        vars(-one_of(quo_name(group_column))), 
        ~paste(quo_name(group_column), .x, sep="_")
      )

    return(smryDta)
}

mtcars %>% generate_summary_tbl(am, mpg)
# # A tibble: 2 x 3
#      am     mean   sum
#   <dbl>    <dbl> <dbl>
# 1     0 17.14737 325.8
# 2     1 24.39231 317.1
mtcars %>% generate_summary_tbl(am, mpg, T)
#   # A tibble: 2 x 3
#      am  am_mean am_sum
#   <dbl>    <dbl>  <dbl>
# 1     0 17.14737  325.8
# 2     1 24.39231  317.1