dplyr不包括以特定数字开头的行_R_Dplyr

dplyr不包括以特定数字开头的行

dplyr不包括以特定数字开头的行,r,dplyr,R,Dplyr,我有这样的数据集 df <- data.frame(ID = c(334, 111, 324, 234), Name = c("Tom", "Mike", "John", "Tim"), Score = c(2, 9, 3, 5)) dflibrary（dplyr）图书馆（微基准） Nlibrary（dplyr）图书馆（微基准） N这是使用dplyr的方法： library(dplyr) df %>

我有这样的数据集

df <- data.frame(ID    = c(334, 111, 324, 234), 
                 Name  = c("Tom", "Mike", "John", "Tim"), 
                 Score = c(2, 9, 3, 5))

dflibrary（dplyr）
图书馆（微基准）
Nlibrary（dplyr）
图书馆（微基准）
N这是使用dplyr的方法：
library(dplyr)
df %>%
  filter(grepl("^[^3]", ID))

结果：
df <- data.frame(ID    = c(334, 111, 324, 234), 
                 Name  = c("Tom", "Mike", "John", "Tim"), 
                 Score = c(2, 9, 3, 5))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: microseconds
      expr    min      lq      mean  median      uq       max neval
  f_grep() 42.207 47.0645  65.51158 58.0910 62.2905   865.607   100
 f_grepl() 35.762 40.5785  59.13411 49.6425 54.4015  1023.742   100
 f_modul() 27.659 32.4575 154.65156 41.5485 44.1945 10969.091   100
 f_sWith() 30.866 35.0830  93.27367 44.0320 47.3740  3642.091   100
 f_subSt() 33.470 37.8465  57.94782 47.1935 49.5860   991.518   100

df <- data.frame(ID    = sample(df$ID, N, replace = TRUE),
                 Name  = sample(df$Name, N, replace = TRUE),
                 Score = sample(df$Score, N, replace = TRUE))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: milliseconds
      expr       min        lq      mean    median        uq       max neval
  f_grep() 472.19564 479.15768 492.12995 495.77323 503.16749 538.67349   100
 f_grepl() 478.68982 483.25584 496.40382 501.86222 507.34989 535.04327   100
 f_modul()  29.78637  30.74446  41.82639  32.61941  53.58474  62.51763   100
 f_sWith() 386.47298 388.99461 401.46679 398.01549 412.25743 435.97195   100
 f_subSt() 423.53511 426.11061 438.80629 442.81014 449.26856 471.70923   100

   ID Name Score
1 111 Mike     9
2 234  Tim     5

df = data.frame(ID = c(334, 111, 324, 234), 
                Name = c("Tom", "Mike", "John", "Tim"), 
                Score = c(2, 9, 3, 5))

数据：
df <- data.frame(ID    = c(334, 111, 324, 234), 
                 Name  = c("Tom", "Mike", "John", "Tim"), 
                 Score = c(2, 9, 3, 5))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: microseconds
      expr    min      lq      mean  median      uq       max neval
  f_grep() 42.207 47.0645  65.51158 58.0910 62.2905   865.607   100
 f_grepl() 35.762 40.5785  59.13411 49.6425 54.4015  1023.742   100
 f_modul() 27.659 32.4575 154.65156 41.5485 44.1945 10969.091   100
 f_sWith() 30.866 35.0830  93.27367 44.0320 47.3740  3642.091   100
 f_subSt() 33.470 37.8465  57.94782 47.1935 49.5860   991.518   100

df <- data.frame(ID    = sample(df$ID, N, replace = TRUE),
                 Name  = sample(df$Name, N, replace = TRUE),
                 Score = sample(df$Score, N, replace = TRUE))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: milliseconds
      expr       min        lq      mean    median        uq       max neval
  f_grep() 472.19564 479.15768 492.12995 495.77323 503.16749 538.67349   100
 f_grepl() 478.68982 483.25584 496.40382 501.86222 507.34989 535.04327   100
 f_modul()  29.78637  30.74446  41.82639  32.61941  53.58474  62.51763   100
 f_sWith() 386.47298 388.99461 401.46679 398.01549 412.25743 435.97195   100
 f_subSt() 423.53511 426.11061 438.80629 442.81014 449.26856 471.70923   100

   ID Name Score
1 111 Mike     9
2 234  Tim     5

df = data.frame(ID = c(334, 111, 324, 234), 
                Name = c("Tom", "Mike", "John", "Tim"), 
                Score = c(2, 9, 3, 5))

这就是使用dplyr的方法：
library(dplyr)
df %>%
  filter(grepl("^[^3]", ID))

结果：
df <- data.frame(ID    = c(334, 111, 324, 234), 
                 Name  = c("Tom", "Mike", "John", "Tim"), 
                 Score = c(2, 9, 3, 5))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: microseconds
      expr    min      lq      mean  median      uq       max neval
  f_grep() 42.207 47.0645  65.51158 58.0910 62.2905   865.607   100
 f_grepl() 35.762 40.5785  59.13411 49.6425 54.4015  1023.742   100
 f_modul() 27.659 32.4575 154.65156 41.5485 44.1945 10969.091   100
 f_sWith() 30.866 35.0830  93.27367 44.0320 47.3740  3642.091   100
 f_subSt() 33.470 37.8465  57.94782 47.1935 49.5860   991.518   100

df <- data.frame(ID    = sample(df$ID, N, replace = TRUE),
                 Name  = sample(df$Name, N, replace = TRUE),
                 Score = sample(df$Score, N, replace = TRUE))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: milliseconds
      expr       min        lq      mean    median        uq       max neval
  f_grep() 472.19564 479.15768 492.12995 495.77323 503.16749 538.67349   100
 f_grepl() 478.68982 483.25584 496.40382 501.86222 507.34989 535.04327   100
 f_modul()  29.78637  30.74446  41.82639  32.61941  53.58474  62.51763   100
 f_sWith() 386.47298 388.99461 401.46679 398.01549 412.25743 435.97195   100
 f_subSt() 423.53511 426.11061 438.80629 442.81014 449.26856 471.70923   100

   ID Name Score
1 111 Mike     9
2 234  Tim     5

df = data.frame(ID = c(334, 111, 324, 234), 
                Name = c("Tom", "Mike", "John", "Tim"), 
                Score = c(2, 9, 3, 5))

数据：
df <- data.frame(ID    = c(334, 111, 324, 234), 
                 Name  = c("Tom", "Mike", "John", "Tim"), 
                 Score = c(2, 9, 3, 5))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: microseconds
      expr    min      lq      mean  median      uq       max neval
  f_grep() 42.207 47.0645  65.51158 58.0910 62.2905   865.607   100
 f_grepl() 35.762 40.5785  59.13411 49.6425 54.4015  1023.742   100
 f_modul() 27.659 32.4575 154.65156 41.5485 44.1945 10969.091   100
 f_sWith() 30.866 35.0830  93.27367 44.0320 47.3740  3642.091   100
 f_subSt() 33.470 37.8465  57.94782 47.1935 49.5860   991.518   100

df <- data.frame(ID    = sample(df$ID, N, replace = TRUE),
                 Name  = sample(df$Name, N, replace = TRUE),
                 Score = sample(df$Score, N, replace = TRUE))

microbenchmark(f_grep(), f_grepl(), f_modul(), f_sWith(), f_subSt())

Unit: milliseconds
      expr       min        lq      mean    median        uq       max neval
  f_grep() 472.19564 479.15768 492.12995 495.77323 503.16749 538.67349   100
 f_grepl() 478.68982 483.25584 496.40382 501.86222 507.34989 535.04327   100
 f_modul()  29.78637  30.74446  41.82639  32.61941  53.58474  62.51763   100
 f_sWith() 386.47298 388.99461 401.46679 398.01549 412.25743 435.97195   100
 f_subSt() 423.53511 426.11061 438.80629 442.81014 449.26856 471.70923   100

   ID Name Score
1 111 Mike     9
2 234  Tim     5

df = data.frame(ID = c(334, 111, 324, 234), 
                Name = c("Tom", "Mike", "John", "Tim"), 
                Score = c(2, 9, 3, 5))

库（dplyr）
过滤器（df，substr（ID，1,1）！=3）
为什么需要dplyrdf[grep（“^3”，df$ID，invert=TRUE），]
grepl
会更安全。请在编写示例时使用有效代码。这里，c（Tom，etal）
失败，因为您跳过了引号。@PoGibas这不会“排除”ID以3library（dplyr）开头的字符。
filter（df，substr（ID，1,1）！=3）
为什么需要dplyrdf[grep（“^3”，df$ID，invert=TRUE），]
grepl
会更安全。请在编写示例时使用有效代码。这里，c（Tom，etal）
失败，因为您跳过了引号。@PoGibas这并没有“排除”ID以3开头。这实际上并没有解决如何在dplyr工作流中具体执行的问题。OP也从未要求性能，因此，显示基准测试有点过分这实际上并没有解决如何在dplyr工作流中具体执行的问题。OP也从来没有要求性能，所以显示基准测试有点过分不知何故，我错过了从3开始的“排除”ID。编辑以更正我如何错过以3开头的“排除”ID。编辑更正