如何设置在R中输出csv文件的限制?
如果我的输出文件是16MB,并且我将限制设置为5MB,那么它应该生成4个文件,每个文件3个5MB,一个1MB如何设置在R中输出csv文件的限制?,r,R,如果我的输出文件是16MB,并且我将限制设置为5MB,那么它应该生成4个文件,每个文件3个5MB,一个1MB 或者,它可以具有文件中最大行数的条件。比如说,具有16k行项目的文件,如果具有5k行项目的条件,则应生成4个文件,5k行项目中的三个,1k行项目中的一个 writeCSV_group <- function(x, nMB=NULL, nrow=NULL, stem=NULL, ...){ if(!is.null(nMB) & !is.null(nrow))stop(&q
或者,它可以具有文件中最大行数的条件。比如说,具有16k行项目的文件,如果具有5k行项目的条件,则应生成4个文件,5k行项目中的三个,1k行项目中的一个
writeCSV_group <- function(x, nMB=NULL, nrow=NULL, stem=NULL, ...){
if(!is.null(nMB) & !is.null(nrow))stop("Only one of nMB or nrow can be specified.\n")
if(is.null(nMB) & is.null(nrow))stop("One of nMB or nrow must be specified.\n")
size_metric <- ifelse(is.null(nMB), "rows", "size")
size_val <- ifelse(is.null(nMB), nrow, nMB)
size_fun <- switch(size_metric,
"size" = function(x)object.size(x)/(1024^2),
"rows" = nrow)
obj.size <- size_fun(x)
class(obj.size) <- "numeric"
nrx <- nrow(x)
sizePerRow <- obj.size/nrx
gsize <- ceiling(size_val/sizePerRow)
nGroups <- ceiling(obj.size/size_val)
g <- rep(1:nGroups, each = gsize)
g <- g[1:nrx]
s <- split(x, g)
stem <- ifelse(is.null(stem), gsub(".*\\/(.*)$", "\\1", tempfile()), stem)
fn <- paste0(stem, "_", 1:nGroups, ".csv")
for(i in 1:length(fn)){
write.csv(s[[i]], fn[i], ...)
}
cat("files written as:\n ",
paste(fn, collapse="\n "), sep="")
}
writeCSV_group类似的方法应该可以奏效:
writeCSV_group <- function(x, nMB=NULL, nrow=NULL, stem=NULL, ...){
if(!is.null(nMB) & !is.null(nrow))stop("Only one of nMB or nrow can be specified.\n")
if(is.null(nMB) & is.null(nrow))stop("One of nMB or nrow must be specified.\n")
size_metric <- ifelse(is.null(nMB), "rows", "size")
size_val <- ifelse(is.null(nMB), nrow, nMB)
size_fun <- switch(size_metric,
"size" = function(x)object.size(x)/(1024^2),
"rows" = nrow)
obj.size <- size_fun(x)
class(obj.size) <- "numeric"
nrx <- nrow(x)
sizePerRow <- obj.size/nrx
gsize <- ceiling(size_val/sizePerRow)
nGroups <- ceiling(obj.size/size_val)
g <- rep(1:nGroups, each = gsize)
g <- g[1:nrx]
s <- split(x, g)
stem <- ifelse(is.null(stem), gsub(".*\\/(.*)$", "\\1", tempfile()), stem)
fn <- paste0(stem, "_", 1:nGroups, ".csv")
for(i in 1:length(fn)){
write.csv(s[[i]], fn[i], ...)
}
cat("files written as:\n ",
paste(fn, collapse="\n "), sep="")
}
writeCSV_group这里有一个相对简单的解决方案:
library(nycflights13)
library(readr) # for faster write_csv
df <- flights
# number of items in each chunk
elements_per_chunk <- 100000
# list of rows for each chunk
l <- split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk))
# splits and saves csv files
for(i in 1:length(l)){
print(i) # totally optional, gives some feedback on progress
write_csv(df[l[[i]],],
file=paste0("flights_chunk", i, ".csv"), )
}
库(nycflights13)
库(readr)#用于更快的写入_csv
df这里有一个相对简单的解决方案:
library(nycflights13)
library(readr) # for faster write_csv
df <- flights
# number of items in each chunk
elements_per_chunk <- 100000
# list of rows for each chunk
l <- split(1:nrow(df), ceiling(seq_along(1:nrow(df))/elements_per_chunk))
# splits and saves csv files
for(i in 1:length(l)){
print(i) # totally optional, gives some feedback on progress
write_csv(df[l[[i]],],
file=paste0("flights_chunk", i, ".csv"), )
}
库(nycflights13)
库(readr)#用于更快的写入_csv
df