Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/regex/17.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
grep()和sub()以及正则表达式_R_Regex_String Substitution - Fatal编程技术网

grep()和sub()以及正则表达式

grep()和sub()以及正则表达式,r,regex,string-substitution,R,Regex,String Substitution,我想将我的data.frame中的变量名从例如“pmm_StartTimev4_E2_C19_1”更改为“pmm_StartTimev4_E2_C19”。因此,如果名称以下划线结尾,后跟任何数字,则该名称将被删除 但是我希望只有当变量名中有单词“Start”时才会发生这种情况 我有一段乱七八糟的代码不起作用。任何帮助都将不胜感激 # Current data frame: dfbefore <- data.frame(a=c("pmm_StartTimev4_E2_C19_1","

我想将我的
data.frame
中的变量名从例如“pmm_StartTimev4_E2_C19_1”更改为“pmm_StartTimev4_E2_C19”。因此,如果名称以下划线结尾,后跟任何数字,则该名称将被删除

但是我希望只有当变量名中有单词“Start”时才会发生这种情况

我有一段乱七八糟的代码不起作用。任何帮助都将不胜感激

# Current data frame:    
dfbefore <- data.frame(a=c("pmm_StartTimev4_E2_C19_1","pmm_StartTimev4_E2_E2_C1","delivery_C1_C12"),b=c("pmm_StartTo_v4_E2_C19_2","complete_E1_C12_1","pmm_StartTo_v4_E2_C19"))

# Desired data frame:
dfafter <- data.frame(a=c("pmm_StartTimev4_E2_C19","pmm_StartTimev4_E2_E2_C1","delivery_C1_C12"),b=c("pmm_StartTo_v4_E2_C19","complete_E1_C12_1","pmm_StartTo_v4_E2_C19"))

# Current code:
sub((.*{1,}[0-9]*).*","",grep("Start",names(df),value = TRUE)
#当前数据帧:

dfbefore我们可以使用
sub
来捕获“开始”子字符串后面跟着下划线和一个或多个数字的组。在替换中,使用捕获组的反向引用。由于有多个列,请使用
lappy
在列上循环,应用
sub
并将输出分配回原始数据

out <- dfbefore
out[] <- lapply(dfbefore, sub, 
            pattern = "^(.*_Start.*)_\\d+$", replacement ="\\1")
out

dfafter[] <- lapply(dfafter, as.character)
all.equal(out, dfafter, check.attributes = FALSE)
#[1] TRUE

out使用
gsub()
这样做怎么样

stripcol

sub(“\ud$”,“
每个字符串只有一个替换,因此
gsub
可以是
sub
stripcol <- function(x) {
  gsub("(.*Start.*)_\\d+$", "\\1", as.character(x))  
}

dfnew <- dfbefore
dfnew[] <- lapply(dfbefore, stripcol)
doit <- function(x){
  x <- as.character(x)
  if(grepl("Start",x)){
    x <- gsub("_([0-9])","",x)
  }
  return(x)
} 


apply(dfbefore,c(1,2),doit)
    a                          b                      
[1,] "pmm_StartTimev4_E2_C19"   "pmm_StartTo_v4_E2_C19"
[2,] "pmm_StartTimev4_E2_E2_C1" "complete_E1_C12_1"    
[3,] "delivery_C1_C12"          "pmm_StartTo_v4_E2_C19"