data.table fread（）-跳过文档的第一部分_R_Data.table_Fread

data.table fread（）-跳过文档的第一部分

data.table fread（）-跳过文档的第一部分,r,data.table,fread,R,Data.table,Fread,你好，我有几个文本文件，它们的布局与我想用fread（）函数读入的相同。（请参见此处的两个示例文件：）文件分为两部分，第一部分包含16列，第二部分包含7列。我只需要第一部分的数据，只需要第1列和第2列 dat10 <- fread("CalcV10.txt", select = c(1,2), verbose=TRUE, col.names = c("Net", "Nrp")) > head(dat10) Net Nrp 1: 225 1 2: 247 1 3: 26

你好，我有几个文本文件，它们的布局与我想用fread（）函数读入的相同。（请参见此处的两个示例文件：）文件分为两部分，第一部分包含16列，第二部分包含7列。我只需要第一部分的数据，只需要第1列和第2列

 dat10 <- fread("CalcV10.txt", select = c(1,2), verbose=TRUE, col.names = c("Net", "Nrp"))

> head(dat10)
Net Nrp
1: 225   1
2: 247   1
3: 268   1
4: 287   1
5: 301  12
6: 302   4

我试图更改行数（colClasses=list（character=1:16）），但没有帮助。我感谢每一个小小的暗示

最好的，雅库林

我正在使用data.table的1.10.4版本、R Studio的R 3.3.2版本和1.0.136版本（所有这些都是在两周前更新的）

编辑

我有40个相同名称和布局的文件（Calc.txt）。它们位于20个文件夹中，名为V1-V20，每个文件夹都有两个子文件夹，以两种sim卡类型命名。为了读入这些文本文件，我创建了以下函数：

   read.res <- function(NrV, sim_type, FT) {
   dat <- data.frame()
   V <- paste("V", 1:NrV, sep="")

   for (i in 1:NrV) {
   Dir <- file.path(dataDir, V[i], sim_type)
   setwd(Dir)
   dat0 <- fread("Calc.txt", select = c(1,2), col.names = c("Net", "Nrp"))
   dat0$type <- FT
    dat<-rbind(dat, dat0)
   }
  dat<-as.data.frame(dat)
  return(dat) }

  Forest <- read.res(NrV=20, sim_type=sim_F,  FT="F") 
  nonForest <- read.res(NrV=20, sim_type=sim_nF, FT="nF") 
  data <- rbind(Forest, nonForest)

read.res我还没有看到你的数据——也许你应该设置header=FALSE谢谢你的建议waterling，我已经试过了，没有帮助。非常感谢你抽出时间，如果我一次只想阅读一个文件，这个方法非常有效。但由于我必须阅读40份文件，我有一些问题需要将你的建议纳入我的职能。（请参见编辑）这与处理文件和目录有关。哇，太棒了！我的问题还没有完全解决，但我想我可以自己解决其余的问题。你的回答使我大跃进。非常感谢你。让我一天都很愉快
   read.res <- function(NrV, sim_type, FT) {
   dat <- data.frame()
   V <- paste("V", 1:NrV, sep="")

   for (i in 1:NrV) {
   Dir <- file.path(dataDir, V[i], sim_type)
   setwd(Dir)
   dat0 <- fread("Calc.txt", select = c(1,2), col.names = c("Net", "Nrp"))
   dat0$type <- FT
    dat<-rbind(dat, dat0)
   }
  dat<-as.data.frame(dat)
  return(dat) }

  Forest <- read.res(NrV=20, sim_type=sim_F,  FT="F") 
  nonForest <- read.res(NrV=20, sim_type=sim_nF, FT="nF") 
  data <- rbind(Forest, nonForest)

library('data.table')
fn1 <- "CalcV3.txt"
fn2 <- "CalcV10.txt"

n1 <- grep('Sim_data', readLines(fn1)) - 5  # get the line number matching Sim_data and subtract 5 to it
x1 <- fread(fn1, nrows = n1, header = F, skip = 1, sep = '\t', strip.white = F, stringsAsFactors = F)   # get file contents for the n1 rows

n2 <- grep('Sim_data', readLines(fn2)) - 5
x2 <- fread(fn2, nrows = n2, header = F, skip = 1, sep = '\t', strip.white = F, stringsAsFactors = F)

# split the file contents and convert it to data table
my_func <- function(x, from, to)
{
  y <- strsplit(x, '\ ')   # split string by space
  y <- lapply(y, function(z) as.numeric(z[ z != '' ] )[from:to])   # remove blank characters
  t(rbindlist(l = list( y )))  # combine list elements into data table
}

my_func(x1$V1, 1, 16)   # all columns
#    [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
# V1 1000 2100    7   10   11   12  0.9  1.9    2   2.2  12.3  14.8  17.1  42.1 -52.1 -40.1

my_func(x1$V1, 2, 4)  # columns from 2 to 4
#    [,1] [,2] [,3]
# V1 2100    7   10

my_func(x2$V1, 1, 16)  # all columns
#     [,1] [,2]  [,3]  [,4]  [,5]  [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
# V1   225    1 773.1 773.1 773.1 773.1  0.5  0.5  0.5   0.5  21.5  21.5  34.7  34.7 -42.5 -42.5
# V2   247    1 833.5 833.5 833.5 833.5  0.6  0.6  0.6   0.6  20.2  20.2  40.9  40.9 -15.4 -15.4
# V3   268    1 704.4 704.4 704.4 704.4  1.8  1.8  1.8   1.8  20.6  20.6  32.8  32.8 -42.9 -42.9
# V4   287    1 325.1 325.1 325.1 325.1  0.9  0.9  0.9   0.9  14.0  14.0  25.0  25.0 -42.1 -42.1
# V5   301   12 170.8 325.8 437.8 437.8  0.5  0.8  5.9   5.9   9.8  16.3  17.2  27.2 -32.2 -20.2
# V6   302    4  85.0 218.0 218.0 218.0  0.5  0.5  0.5   0.5   6.8  14.9   8.1  15.1 -38.4 -34.4
# V7   303    3  70.5  85.5  85.5  85.5  0.5  0.5  0.5   0.5   6.2   6.4  11.4  12.4 -26.9 -17.9
# V8   316   56 499.1 689.1 728.1 772.1  0.6  1.3  1.8   1.9  15.9  20.9  28.9  36.9 -38.6 -31.6
# V9   317  772 367.5 569.5 618.5 705.5  0.5  0.7  0.9   1.0  13.7  17.9  27.3  35.3 -26.6 -14.6
# V10  318   52 304.2 445.2 511.2 615.2  0.6  1.3  1.8   2.0  12.5  17.8  23.5  34.5 -21.6   0.4
# V11  319    4 412.3 527.3 527.3 527.3  0.6  0.7  0.7   0.7  15.1  20.9  21.9  33.9 -25.8  -4.8
# V12  330   14 107.7 264.7 421.7 421.7  0.5  0.8  1.3   1.3   8.2  14.4  14.7  27.7 -45.7 -27.7
# V13  331  872 229.3 406.3 468.3 531.3  0.5  1.0  1.5   2.3  11.7  17.1  19.2  28.2 -47.5 -37.5
# V14  332   35 428.1 690.1 728.1 774.1  1.1  3.2  4.1   4.8  17.0  22.6  22.6  35.6 -51.3 -35.3
# V15  333    4 452.0 523.0 523.0 523.0  0.7  1.0  1.0   1.0  15.8  17.1  28.5  29.5 -45.9 -38.9
# V16 1000 2100 143.6 200.6 215.6 232.6  1.2  2.1  2.3   2.4  12.4  14.8   8.1  17.1 -52.1 -41.1

# split the file contents and convert it to data table
my_func <- function(x, from, to)
{
  y <- strsplit(x, '\ ')   # split string by space
  y <- lapply(y, function(z) as.numeric(z[ z != '' ] )[from:to])   # remove blank characters
  t(rbindlist(l = list( y )))  # combine list elements into data table
}

root_path <- "temp"   # Set `root_path` variable to a desired location
fdirs <- unlist(lapply(file.path(root_path, c(paste('V', 1:20, sep = ''))),
                       function(x) file.path(x, c(paste('sim_types', 1:2, sep = '')))))

all_dfs <- list()  # this list contains data frames of all files
for ( i in fdirs)
{
  require('data.table')
  fn <- file.path(i, 'Calc.txt')

  if ( file.exists( fn ) ){
    n1 <- grep('Sim_data', readLines(fn)) - 5  # get the line number matching Sim_data and subtract 5 to it
    x1 <- fread(fn, nrows = n1, header = F, skip = 1, sep = '\t', strip.white = F, stringsAsFactors = F)   # get file contents for the n1 rows
    df <- my_func(x1$V1, 1, 2)
    colnames(df) <- c('Net', 'Nrp')
    all_dfs[[fn]] <- df
  } else {
    warning(paste('The file ', fn, ' does not exist!', sep = ''))
  }
}

warnings()
# 38: The file temp/V20/sim_types2/Calc.txt does not exist!

all_dfs
# $`temp/V1/sim_types1/Calc.txt`
# Net  Nrp
# V1 1000 2100
# 
# $`temp/V1/sim_types2/Calc.txt`
# Net  Nrp
# V1 1000 2100
# 
# $`temp/V2/sim_types1/Calc.txt`
# Net  Nrp
# V1 1000 2100
# 
# $`temp/V2/sim_types2/Calc.txt`
# Net  Nrp
# V1 1000 2100

# reproducible example
root_path <- "temp"

dirs <- file.path(root_path, c(paste('V', 1:20, sep = '')))

for(fpath in dirs)
{
  dir.create(path = fpath, recursive = TRUE )

  sub_dirs <- file.path(fpath, c(paste('sim_types', 1:2, sep = '')))
  for( sfpath in sub_dirs){
    dir.create(path = sfpath, recursive = TRUE )
    file.create(file.path(sfpath, 'Calc.txt'))
  }
}