R从文件导入构建大型数据帧

R从文件导入构建大型数据帧,r,matlab,lambda,R,Matlab,Lambda,我使用readMat命令从MATLAB.mat文件导入了大量数据。数据有这样的特征布局:struct$ERP:200x256x16,struct$TGT:200x1,以及我正在丢弃的一些其他无关内容。TGT变量是一个分类目标,第三个维度是每个目标的试验指标 在每个单独的主题文件夹中都有七个或更多这样的文件——我想建立一个有效的结构,使我能够快速地进行单独的试验(沿三维切片),同时使用正确的R样式跟踪目标变量 我可以用一个主题的文件以相对笨拙的方式执行以下步骤: require(R.matlab)

我使用readMat命令从MATLAB.mat文件导入了大量数据。数据有这样的特征布局:
struct$ERP:200x256x16
struct$TGT:200x1
,以及我正在丢弃的一些其他无关内容。
TGT
变量是一个分类目标,第三个维度是每个目标的试验指标

在每个单独的主题文件夹中都有七个或更多这样的文件——我想建立一个有效的结构,使我能够快速地进行单独的试验(沿三维切片),同时使用正确的R样式跟踪目标变量

我可以用一个主题的文件以相对笨拙的方式执行以下步骤:

require(R.matlab)
subdirs <- list.dirs(".")
filelist <- list.files(path = '.', full.names = FALSE, pattern = "^.*\\.mat$", 
                   ignore.case = TRUE, recursive = TRUE, 
                   include.dirs = FALSE)


sub1t1src <- as.data.frame(filedatas[1][1][[1]]$eeg)
erp1 <- sub1t1src[1,1]$ERP
erp1 <- aperm(erp1,c(2,3,1)) # data is permuted differently than I would like
erp1r <- apply(erp1,2,rbind)
erp1rdf <- as.data.frame(erp1r)

tgt1 <- sub1t1df[2,1]$TGT
tgt1 <- as.factor(tgt1)
tgt1r <- rep(tgt1,each=256)

sub1t1df <- cbind(erp1rdf,tgt1r)

考虑到罗兰的建议,我想这是一个不好的使用for循环的方法。因为物体太大,我把对象分开了

subdirs <- list.dirs(".")

for (this_subd in 2:length(subdirs)) {
  erpdata <- array(dim = c(200,16,256)) # ERP array
  targets <- array(dim = c(200,1)) # Target array
  # look for all mat-files in that directory
  filelist <- list.files(path = subdirs[this_subd], full.names = FALSE, 
                         pattern = "^.*.mat$", 
                         ignore.case = TRUE, recursive = TRUE, 
                         include.dirs = FALSE)

  # combine current subdir path
  filelist <- paste(subdirs[this_subd],filelist,sep="/")

  # Anonymous function to work over each file and resave as R data
  filedatas <- lapply(filelist, function(x) {
    curdata <- readMat(con = x)
    return(curdata)
  })

  for (this_file in 2:length(filedatas)) {
    this_erp <- filedatas[][[this_file]]$eeg
    this_tgt <- this_erp[,,1]$TGT
    this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
    erpdata <- abind(erpdata,this_erp,along=1)
    targets <- rbind(targets,this_tgt)
  }

  # Permute the data into  samples X channels X trials
  erpdata <- aperm(erpdata,c(3,2,1))
  # Remove NAs from originally initializing array
  erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
  targets <- array(targets[!is.na(targets)],dim = dim(targets))
  targets <- as.factor(targets) # convert to categorical variable

  save(erpdata,targets,
       file = paste(subdirs[this_subd],"/",
                  substring(subdirs[this_subd],first=3),
                  "unifieddata.Rdata",
                  sep = "")
  )
  # cleanup to save memory
  rm(erpdata,targets,this_erp,this_tgt)
}

细分如果你的结构是
16x256x200
(全部为数字),你应该将其放入数组而不是data.frame。在我看来,这并不“糟糕”。大多数数据都是数字的,但每个16x256“切片”都与一个TGT分类因子配对。你应该在问题中添加你想对R中的数据做什么。
subdirs <- list.dirs(".")

for (this_subd in 2:length(subdirs)) {
  erpdata <- array(dim = c(200,16,256)) # ERP array
  targets <- array(dim = c(200,1)) # Target array
  # look for all mat-files in that directory
  filelist <- list.files(path = subdirs[this_subd], full.names = FALSE, 
                         pattern = "^.*.mat$", 
                         ignore.case = TRUE, recursive = TRUE, 
                         include.dirs = FALSE)

  # combine current subdir path
  filelist <- paste(subdirs[this_subd],filelist,sep="/")

  # Anonymous function to work over each file and resave as R data
  filedatas <- lapply(filelist, function(x) {
    curdata <- readMat(con = x)
    return(curdata)
  })

  for (this_file in 2:length(filedatas)) {
    this_erp <- filedatas[][[this_file]]$eeg
    this_tgt <- this_erp[,,1]$TGT
    this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
    erpdata <- abind(erpdata,this_erp,along=1)
    targets <- rbind(targets,this_tgt)
  }

  # Permute the data into  samples X channels X trials
  erpdata <- aperm(erpdata,c(3,2,1))
  # Remove NAs from originally initializing array
  erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
  targets <- array(targets[!is.na(targets)],dim = dim(targets))
  targets <- as.factor(targets) # convert to categorical variable

  save(erpdata,targets,
       file = paste(subdirs[this_subd],"/",
                  substring(subdirs[this_subd],first=3),
                  "unifieddata.Rdata",
                  sep = "")
  )
  # cleanup to save memory
  rm(erpdata,targets,this_erp,this_tgt)
}