R 使用代码范围转换data.table

R 使用代码范围转换data.table,r,data.table,R,Data.table,我有一个数据集,其中包含一系列代码和标题: library(data.table) dataset <- data.table( start = c("A00", "A20", "C10", "F00"), end = c("A09", "A35", "C19", "F15"), title = c("title1", "title2", "title3", "title4")) #> start end title #> 1: A00 A

我有一个数据集,其中包含一系列代码和标题:

library(data.table)
dataset <- data.table(
    start = c("A00", "A20", "C10", "F00"),
    end = c("A09", "A35", "C19", "F15"),
    title = c("title1", "title2", "title3", "title4"))

#>    start end  title
#> 1:   A00 A09 title1
#> 2:   A20 A35 title2
#> 3:   C10 C19 title3
#> 4:   F00 F20 title4
我目前的解决方案是:

seq_code <- function(start, end) {
    letter <- substr(start, 1, 1)
    start <- substr(start, 2, 3)
    end <- substr(end, 2, 3)
    paste0(letter, sprintf("%.2d", start:end))
}

rbindlist(lapply(1:nrow(dataset), function(i) {
    dataset[i, list(code = seq_code(start, end), title = title)]
}))
长凳:

microbenchmark::microbenchmark(
    lapply = rbindlist(lapply(1:nrow(dataset), function(i) {
        dataset[i, list(code = seq_code(start, end), title = title)]
    })),
    by = dataset[, list(code = seq_code(start, end)), by = title]
)

#> Unit: microseconds
#> expr      min       lq      mean    median        uq      max neval cld
#> lapply 2024.874 2065.387 2166.9491 2085.2535 2149.1420 4979.722   100   b
#>     by  486.404  510.853  531.5532  519.6025  536.6735  821.413   100  a 
够好吗?(不准确,因为列是颠倒的,但如果这很重要,则
setcolorder
将完成此工作)

dataset[,,

{x可能是@Pascal的重复-我认为这些答案可以很容易地改编,但我不确定它是否完全重复。@最近的邮件不完全,可能…呸,骗子..语义学-但我同意它绝对相关,值得链接。Michael下面的答案几乎只是一个改编。
dataset[, list(code = seq_code(start, end)), by = title]
microbenchmark::microbenchmark(
    lapply = rbindlist(lapply(1:nrow(dataset), function(i) {
        dataset[i, list(code = seq_code(start, end), title = title)]
    })),
    by = dataset[, list(code = seq_code(start, end)), by = title]
)

#> Unit: microseconds
#> expr      min       lq      mean    median        uq      max neval cld
#> lapply 2024.874 2065.387 2166.9491 2085.2535 2149.1420 4979.722   100   b
#>     by  486.404  510.853  531.5532  519.6025  536.6735  821.413   100  a 
dataset[ ,
        {x <- substr(start, 1, 1)
        s <- as.integer(substr(start, 2, 3))
        e <- as.integer(substr(end, 2, 3))
        .(code=paste0(x, sprintf("%02d", s:e)))}, by = title]
dataset[,
        {x<-unlist(lapply(
          .SD,tstrsplit,split="(?<=[[:alpha:]])",perl=T))
        .(code=paste0(
          x[1],sprintf("%02d",do.call(
            "seq",as.list(as.integer(x[c(2,4)]))))))},
        by=title]