Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/83.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Regex 将data.frame的colnames增加1_Regex_R_String_Dataframe_Increment - Fatal编程技术网

Regex 将data.frame的colnames增加1

Regex 将data.frame的colnames增加1,regex,r,string,dataframe,increment,Regex,R,String,Dataframe,Increment,具有带有colname的data.frame nam <- c("a", paste0("a_", seq(12))) "a" "a_1" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" 到目前为止,我的解决方案看起来非常复杂。。。有比这更简单的方法吗 increment_names <- function(nam){ where <- regexpr("\\d", nam) i

具有带有colname的data.frame

nam <- c("a", paste0("a_", seq(12)))
"a" "a_1" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
到目前为止,我的解决方案看起来非常复杂。。。有比这更简单的方法吗

increment_names <- function(nam){
  where <- regexpr("\\d", nam)
  ind <- which(where > 0)
  increment <- as.numeric(substring(nam[ind], where[ind])) + 1
  substring(nam[ind], where[ind]) <- as.character(increment)
  nam
}

> increment_names(nam)
 [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"

increment\u names只要您的模式是“非数字”:


使用
gsubfn
包,您可以做一些简单的事情

library(gsubfn) 
gsubfn("\\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a"    "a_2"  "a_3"  "a_4"  "a_5"  "a_6"  "a_7"  "a_8"  "a_9"  "a_10" "a_11" "a_12" "a_13"
这将适用于任何模式,例如,您不需要假设上面提到的“非数字”模式

(nam <- c("a", paste0(seq(12), "_a")))
## [1] "a"    "1_a"  "2_a"  "3_a"  "4_a"  "5_a"  "6_a"  "7_a"  "8_a"  "9_a"  "10_a" "11_a" "12_a"
gsubfn("\\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a"    "2_a"  "3_a"  "4_a"  "5_a"  "6_a"  "7_a"  "8_a"  "9_a"  "10_a" "11_a" "12_a" "13_a"

(namBase
regmatches
解决方案:

r <- regexpr("\\d+", nam)
regmatches(nam, r) <- as.numeric(regmatches(nam, r)) + 1
nam
# [1] "a"    "a_2"  "a_3"  "a_4"  "a_5"  "a_6"  "a_7"  "a_8"  ...
r您可以尝试使用“ore”软件包,使用该软件包,您的替换可以是函数,如下所示:

nam <- c("a", paste0("a_", seq(12)))
nam
library(ore)
ore.subst("-?\\d+", function(x) as.numeric(x) + 1, nam, all = TRUE)
#  [1] "a"    "a_2"  "a_3"  "a_4"  "a_5"  "a_6"  "a_7"  "a_8"  "a_9" 
# [10] "a_10" "a_11" "a_12" "a_13"

(+1)不需要“非数字”模式。由于我有这个模式,我使用基本解决方案Interesting
gsubfn
将NAs转换为“”,而不是“NA”
as.character
+1为了提醒我关于
gsubfn
@BrodieG,仅供参考,我正在发布与您完全相同的解决方案,但后来看到您的编辑并意识到我们将发布相同的内容,因此,多亏了这一点,我提出了这个宝石:)几天前您已经拥有了我对此的投票权,但我想您可能会对我最近遇到的一个包感兴趣,它的功能与您在这里所做的类似,但速度要快得多。请看。@AnandaMahto有趣(+1)您对其进行了基准测试吗?你知道是什么原因使它更高效吗?太棒了,我从来没有想过使用这种替代形式。
(nam <- c("a", paste0(seq(12), "_a")))
## [1] "a"    "1_a"  "2_a"  "3_a"  "4_a"  "5_a"  "6_a"  "7_a"  "8_a"  "9_a"  "10_a" "11_a" "12_a"
gsubfn("\\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a"    "2_a"  "3_a"  "4_a"  "5_a"  "6_a"  "7_a"  "8_a"  "9_a"  "10_a" "11_a" "12_a" "13_a"
r <- regexpr("\\d+", nam)
regmatches(nam, r) <- as.numeric(regmatches(nam, r)) + 1
nam
# [1] "a"    "a_2"  "a_3"  "a_4"  "a_5"  "a_6"  "a_7"  "a_8"  ...
nam <- c("a", paste0("a_", seq(12)))
nam
library(ore)
ore.subst("-?\\d+", function(x) as.numeric(x) + 1, nam, all = TRUE)
#  [1] "a"    "a_2"  "a_3"  "a_4"  "a_5"  "a_6"  "a_7"  "a_8"  "a_9" 
# [10] "a_10" "a_11" "a_12" "a_13"
library(stringi)
set.seed(1)
nam <- stri_rand_strings(10000, 5, pattern = "[A-J0-9]")

f_ORE <- function(invec = nam) {
  ore.subst("-?\\d+", function(x) as.numeric(x) + 1, invec, all = TRUE)
} 

f_GSUBFN <- function(invec = nam) {
  gsubfn("\\d+", function(x) as.numeric(x) + 1, invec)
}

f_BASE <- function(invec = nam) {
  r <- regexpr("\\d+", invec)
  regmatches(invec, r) <- as.numeric(regmatches(invec, r))+1
  invec
}

system.time(f_GSUBFN())
#    user  system elapsed 
#    5.48    0.01    5.50 

library(microbenchmark)
microbenchmark(f_BASE(), f_ORE())
# Unit: milliseconds
#      expr       min        lq      mean    median        uq      max neval
#  f_BASE() 141.79743 149.58914 161.49041 152.81038 162.10550 357.6483   100
#   f_ORE()  57.35309  59.58433  65.84678  60.92218  68.40062 116.7714   100
> identical(f_ORE(), f_GSUBFN())
[1] TRUE

## Edge case...
> nam[988]
[1] "0G019"
> f_ORE()[988]     ## 019 becomes 20 (without the leading zero)
[1] "1G20"
> f_GSUBFN()[988]  ## Same
[1] "1G20"
> f_BASE()[988]    ## This seems off...
[1] "1G019"