使用str_replace_all（）更改data.frame（）中的多个列名（）_R_Pattern Matching_Rename_Stringr

使用str_replace_all（）更改data.frame（）中的多个列名（）

使用str_replace_all（）更改data.frame（）中的多个列名（）,r,pattern-matching,rename,stringr,R,Pattern Matching,Rename,Stringr,我读了这篇文章并练习了匹配模式，但我仍然没有弄清楚我有一个同样尺寸的面板，每年数次。现在，我想以合乎逻辑的方式重命名它们。我的原始数据看起来有点像这样 set.seed(667) dta <- data.frame(id = 1:6, R1213 = runif(6), R1224 = runif(6, 1, 2), R1255 = runif(6, 2, 3),

我读了这篇文章并练习了匹配模式，但我仍然没有弄清楚

我有一个同样尺寸的面板，每年数次。现在，我想以合乎逻辑的方式重命名它们。我的原始数据看起来有点像这样

set.seed(667)
dta <- data.frame(id = 1:6,
                  R1213 =  runif(6), 
                  R1224 = runif(6, 1, 2),
                  R1255 = runif(6, 2, 3),
                  R1235 = runif(6, 3, 4))

# install.packages(c("tidyverse"), dependencies = TRUE)
require(tidyverse)
(tbl <- dta %>% as_tibble())
#> # A tibble: 6 x 5
#>      id R1213 R1224 R1255 R1235
#>   <int> <dbl> <dbl> <dbl> <dbl>
#> 1     1 0.488  1.60  2.07  3.07
#> 2     2 0.692  1.42  2.76  3.19
#> 3     3 0.262  1.34  2.33  3.82
#> 4     4 0.330  1.77  2.61  3.93
#> 5     5 0.582  1.92  2.15  3.86
#> 6     6 0.930  1.88  2.56  3.59

每个调用

实际上都来自同一年，比如2017年，但是需要附加后缀

.1

，

.2

，等等。我一遍又一遍地使用

paste0（'A.2017'，1:3）

，但这一次有三个就足够了

tbl <- dta %>% as_tibble()
names(tbl) <- tbl %>% names() %>% 
               str_replace_all('^R1.[125].$', paste0('A.2017.', 1:3)) %>% 
               str_replace_all('^R1.[7].$', paste0('A.2018.', 1))
tbl
#> Warning message:
#> In stri_replace_all_regex(string, pattern, fix_replacement(replacement),  :
#>   longer object length is not a multiple of shorter object length
#> > tbl
#> # A tibble: 6 x 5
#>      id A.2017.2 A.2017.3 A.2017.1 R1235
#>   <int>    <dbl>    <dbl>    <dbl> <dbl>
#> 1     1    0.488     1.60     2.07  3.07
#> 2     2    0.692     1.42     2.76  3.19
#> 3     3    0.262     1.34     2.33  3.82
#> 4     4    0.330     1.77     2.61  3.93
#> 5     5    0.582     1.92     2.15  3.86
#> 6     6    0.930     1.88     2.56  3.59

tbl%为不兼容（）
名称（待定）%names（）%>%
str_replace_all（“^R1[125].$”，paste0（'A.2017'，1:3））%>%
str_replace_all（“^R1[7].$”，粘贴0（'A.2018'，1））
tbl
#>警告信息：
#>在stri_replace_all_regex（字符串、模式、修复_replacement（replacement））中：
#>较长的对象长度不是较短对象长度的倍数
#>>待定
#>#tibble:6 x 5
#>id A.2017.2 A.2017.3 A.2017.1 R1235
#>                
#> 1     1    0.488     1.60     2.07  3.07
#> 2     2    0.692     1.42     2.76  3.19
#> 3     3    0.262     1.34     2.33  3.82
#> 4     4    0.330     1.77     2.61  3.93
#> 5     5    0.582     1.92     2.15  3.86
#> 6     6    0.930     1.88     2.56  3.59

这确实出现了，但顺序颠倒了，我被告知较长的对象长度不是较短对象长度的倍数，但我不是

正确的长度吗？我希望以一种更干净、更简单的方式来做这件事。而且，我真的不喜欢

名称（tbl）基于David的建议-使用dplyr:：rename_at
进行如下操作怎么样
library(dplyr)

## Get data
set.seed(667)
dta <- data.frame(id = 1:6,
                  R1213 =  runif(6), 
                  R1224 = runif(6, 1, 2),
                  R1255 = runif(6, 2, 3),
                  R1235 = runif(6, 3, 4)) %>% 
  as_tibble()


## Rename
dta <- dta %>% 
  rename_at(.vars = grep('^R1.[125].$', names(.)), 
            .funs = ~paste0("A.2017.", 1:length(.)))

dta            
#> # A tibble: 6 x 5
#>      id A.2017.1 A.2017.2 A.2017.3 R1235
#>   <int>    <dbl>    <dbl>    <dbl> <dbl>
#> 1     1    0.196     1.74     2.51  3.49
#> 2     2    0.478     1.85     2.06  3.69
#> 3     3    0.780     1.32     2.21  3.26
#> 4     4    0.705     1.49     2.49  3.33
#> 5     5    0.942     1.59     2.66  3.58
#> 6     6    0.906     1.90     2.87  3.93

朝着整洁的数据方向发展
现在变量已经被重命名，您可能会发现以整洁的格式保存数据很有用
library(tidyr)
library(dplyr)

#Use tidy dataframe gather all variables, split by "." and drop A column (or keep if a measurement id)
renamed_dta %>% 
  gather(key = "measure", value = "value", -id) %>% 
  separate(measure, c("A", "year", "measure"), "[[.]]") %>% 
  select(-A)
#> # A tibble: 24 x 4
#>       id year  measure value
#>    <int> <chr> <chr>   <dbl>
#>  1     1 2017  1       0.196
#>  2     2 2017  1       0.478
#>  3     3 2017  1       0.780
#>  4     4 2017  1       0.705
#>  5     5 2017  1       0.942
#>  6     6 2017  1       0.906
#>  7     1 2017  2       1.74 
#>  8     2 2017  2       1.85 
#>  9     3 2017  2       1.32 
#> 10     4 2017  2       1.49 
#> # ... with 14 more rows

library（tidyr）
图书馆（dplyr）
#使用tidy dataframe收集所有变量，按“.”拆分并删除一列（如果是度量id，则保留）
已重命名\u dta%>%
聚集（key=“measure”、value=“value”、-id）%>%
单独（计量单位，c（“A”、“年”、“计量单位”），“[.]]”%>%
选择（-A）
#>#tibble:24 x 4
#>id年份度量值
#>         
#>  1     1 2017  1       0.196
#>  2     2 2017  1       0.478
#>  3     3 2017  1       0.780
#>  4     4 2017  1       0.705
#>  5     5 2017  1       0.942
#>  6     6 2017  1       0.906
#>  7     1 2017  2       1.74 
#>  8     2 2017  2       1.85 
#>  9     3 2017  2       1.32 
#> 10     4 2017  2       1.49 
#>#…还有14行
你试过在

处重命名吗？这是一个很好的答案。非常感谢！你的函数对我特别有用。谢谢。太好了，很乐意帮忙。你能接受它作为答案吗？

library(dplyr)
library(purrr)

## Get data
set.seed(667)
dta <- data.frame(id = 1:6,
                  R1213 =  runif(6), 
                  R1224 = runif(6, 1, 2),
                  R1255 = runif(6, 2, 3),
                  R1235 = runif(6, 3, 4)) %>% 
  as_tibble()

## Define a function to keep a hold out data set, then rename iteratively for each pattern and replacement.
rename_multiple_years <- function(df, patterns, 
                                  replacements,
                                  hold_out_var = "id") {

  hold_out_df <- df %>% 
    select_at(.vars = hold_out_var)

  rename_df <- map2_dfc(patterns, replacements, function(pattern, replacement) {
    df %>% 
      rename_at(.vars = grep(pattern, names(.)), 
                .funs = ~paste0(replacement, 1:length(.))) %>% 
      select_at(.vars = grep(replacement, names(.)))
  })

  final_df <- bind_cols(hold_out_df, rename_df)

  return(final_df)
}

## Call function on specified patterns and replacements
renamed_dta <- dta %>% 
  rename_multiple_years(patterns = c("^R1.[125].$", "^R1.[3].$"),
                        replacements = c("A.2017.", "A.2018."))
renamed_dta
#> # A tibble: 6 x 5
#>      id A.2017.1 A.2017.2 A.2017.3 A.2018.1
#>   <int>    <dbl>    <dbl>    <dbl>    <dbl>
#> 1     1    0.196     1.74     2.51     3.49
#> 2     2    0.478     1.85     2.06     3.69
#> 3     3    0.780     1.32     2.21     3.26
#> 4     4    0.705     1.49     2.49     3.33
#> 5     5    0.942     1.59     2.66     3.58
#> 6     6    0.906     1.90     2.87     3.93

library(tidyr)
library(dplyr)

#Use tidy dataframe gather all variables, split by "." and drop A column (or keep if a measurement id)
renamed_dta %>% 
  gather(key = "measure", value = "value", -id) %>% 
  separate(measure, c("A", "year", "measure"), "[[.]]") %>% 
  select(-A)
#> # A tibble: 24 x 4
#>       id year  measure value
#>    <int> <chr> <chr>   <dbl>
#>  1     1 2017  1       0.196
#>  2     2 2017  1       0.478
#>  3     3 2017  1       0.780
#>  4     4 2017  1       0.705
#>  5     5 2017  1       0.942
#>  6     6 2017  1       0.906
#>  7     1 2017  2       1.74 
#>  8     2 2017  2       1.85 
#>  9     3 2017  2       1.32 
#> 10     4 2017  2       1.49 
#> # ... with 14 more rows