如何设置在R中输出csv文件的限制?

如何设置在R中输出csv文件的限制?,r,R,如果我的输出文件是16MB,并且我将限制设置为5MB,那么它应该生成4个文件,每个文件3个5MB,一个1MB 或者,它可以具有文件中最大行数的条件。比如说,具有16k行项目的文件,如果具有5k行项目的条件,则应生成4个文件,5k行项目中的三个,1k行项目中的一个 writeCSV_group <- function(x, nMB=NULL, nrow=NULL, stem=NULL, ...){ if(!is.null(nMB) & !is.null(nrow))stop(&q

如果我的输出文件是16MB,并且我将限制设置为5MB,那么它应该生成4个文件,每个文件3个5MB,一个1MB


或者,它可以具有文件中最大行数的条件。比如说,具有16k行项目的文件,如果具有5k行项目的条件,则应生成4个文件,5k行项目中的三个,1k行项目中的一个

writeCSV_group <- function(x, nMB=NULL, nrow=NULL, stem=NULL, ...){
  if(!is.null(nMB) & !is.null(nrow))stop("Only one of nMB or nrow can be specified.\n")
  if(is.null(nMB) & is.null(nrow))stop("One of nMB or nrow must be specified.\n")
  size_metric <- ifelse(is.null(nMB), "rows", "size")
  size_val <- ifelse(is.null(nMB), nrow, nMB)
  size_fun <- switch(size_metric, 
                     "size" = function(x)object.size(x)/(1024^2), 
                     "rows" = nrow)
  obj.size <- size_fun(x)
  class(obj.size) <- "numeric"
  nrx <- nrow(x)
  sizePerRow <- obj.size/nrx
  gsize <- ceiling(size_val/sizePerRow)
  nGroups <- ceiling(obj.size/size_val)
  g <- rep(1:nGroups, each = gsize)
  g <- g[1:nrx]
  s <- split(x, g)
  stem <- ifelse(is.null(stem), gsub(".*\\/(.*)$", "\\1", tempfile()), stem)
  fn <- paste0(stem, "_", 1:nGroups, ".csv")
  for(i in 1:length(fn)){
    write.csv(s[[i]], fn[i], ...)
  }
  cat("files written as:\n  ", 
      paste(fn, collapse="\n  "), sep="")
}


writeCSV_group类似的方法应该可以奏效:

writeCSV_group <- function(x, nMB=NULL, nrow=NULL, stem=NULL, ...){
  if(!is.null(nMB) & !is.null(nrow))stop("Only one of nMB or nrow can be specified.\n")
  if(is.null(nMB) & is.null(nrow))stop("One of nMB or nrow must be specified.\n")
  size_metric <- ifelse(is.null(nMB), "rows", "size")
  size_val <- ifelse(is.null(nMB), nrow, nMB)
  size_fun <- switch(size_metric, 
                     "size" = function(x)object.size(x)/(1024^2), 
                     "rows" = nrow)
  obj.size <- size_fun(x)
  class(obj.size) <- "numeric"
  nrx <- nrow(x)
  sizePerRow <- obj.size/nrx
  gsize <- ceiling(size_val/sizePerRow)
  nGroups <- ceiling(obj.size/size_val)
  g <- rep(1:nGroups, each = gsize)
  g <- g[1:nrx]
  s <- split(x, g)
  stem <- ifelse(is.null(stem), gsub(".*\\/(.*)$", "\\1", tempfile()), stem)
  fn <- paste0(stem, "_", 1:nGroups, ".csv")
  for(i in 1:length(fn)){
    write.csv(s[[i]], fn[i], ...)
  }
  cat("files written as:\n  ", 
      paste(fn, collapse="\n  "), sep="")
}


writeCSV_group这里有一个相对简单的解决方案:

library(nycflights13)
library(readr) # for faster write_csv

df <- flights

# number of items in each chunk
elements_per_chunk <- 100000

# list of rows for each chunk
l <- split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk))

# splits and saves csv files
for(i in 1:length(l)){
  print(i) # totally optional, gives some feedback on progress
  write_csv(df[l[[i]],],
              file=paste0("flights_chunk", i, ".csv"), )
}
库(nycflights13)
库(readr)#用于更快的写入_csv

df这里有一个相对简单的解决方案:

library(nycflights13)
library(readr) # for faster write_csv

df <- flights

# number of items in each chunk
elements_per_chunk <- 100000

# list of rows for each chunk
l <- split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk))

# splits and saves csv files
for(i in 1:length(l)){
  print(i) # totally optional, gives some feedback on progress
  write_csv(df[l[[i]],],
              file=paste0("flights_chunk", i, ".csv"), )
}
库(nycflights13)
库(readr)#用于更快的写入_csv
df