R从文件导入构建大型数据帧_R_Matlab_Lambda

R从文件导入构建大型数据帧

r matlab lambda

R从文件导入构建大型数据帧,r,matlab,lambda,R,Matlab,Lambda,我使用readMat命令从MATLAB.mat文件导入了大量数据。数据有这样的特征布局：struct$ERP:200x256x16，struct$TGT:200x1，以及我正在丢弃的一些其他无关内容。TGT变量是一个分类目标，第三个维度是每个目标的试验指标在每个单独的主题文件夹中都有七个或更多这样的文件——我想建立一个有效的结构，使我能够快速地进行单独的试验（沿三维切片），同时使用正确的R样式跟踪目标变量我可以用一个主题的文件以相对笨拙的方式执行以下步骤： require(R.matlab)

我使用readMat命令从MATLAB.mat文件导入了大量数据。数据有这样的特征布局：

struct$ERP:200x256x16

，

struct$TGT:200x1

，以及我正在丢弃的一些其他无关内容。

TGT

变量是一个分类目标，第三个维度是每个目标的试验指标

在每个单独的主题文件夹中都有七个或更多这样的文件——我想建立一个有效的结构，使我能够快速地进行单独的试验（沿三维切片），同时使用正确的R样式跟踪目标变量

我可以用一个主题的文件以相对笨拙的方式执行以下步骤：

require(R.matlab)
subdirs <- list.dirs(".")
filelist <- list.files(path = '.', full.names = FALSE, pattern = "^.*\\.mat$", 
                   ignore.case = TRUE, recursive = TRUE, 
                   include.dirs = FALSE)


sub1t1src <- as.data.frame(filedatas[1][1][[1]]$eeg)
erp1 <- sub1t1src[1,1]$ERP
erp1 <- aperm(erp1,c(2,3,1)) # data is permuted differently than I would like
erp1r <- apply(erp1,2,rbind)
erp1rdf <- as.data.frame(erp1r)

tgt1 <- sub1t1df[2,1]$TGT
tgt1 <- as.factor(tgt1)
tgt1r <- rep(tgt1,each=256)

sub1t1df <- cbind(erp1rdf,tgt1r)

考虑到罗兰的建议，我想这是一个不好的使用for循环的方法。因为物体太大，我把对象分开了

subdirs <- list.dirs(".")

for (this_subd in 2:length(subdirs)) {
  erpdata <- array(dim = c(200,16,256)) # ERP array
  targets <- array(dim = c(200,1)) # Target array
  # look for all mat-files in that directory
  filelist <- list.files(path = subdirs[this_subd], full.names = FALSE, 
                         pattern = "^.*.mat$", 
                         ignore.case = TRUE, recursive = TRUE, 
                         include.dirs = FALSE)

  # combine current subdir path
  filelist <- paste(subdirs[this_subd],filelist,sep="/")

  # Anonymous function to work over each file and resave as R data
  filedatas <- lapply(filelist, function(x) {
    curdata <- readMat(con = x)
    return(curdata)
  })

  for (this_file in 2:length(filedatas)) {
    this_erp <- filedatas[][[this_file]]$eeg
    this_tgt <- this_erp[,,1]$TGT
    this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
    erpdata <- abind(erpdata,this_erp,along=1)
    targets <- rbind(targets,this_tgt)
  }

  # Permute the data into  samples X channels X trials
  erpdata <- aperm(erpdata,c(3,2,1))
  # Remove NAs from originally initializing array
  erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
  targets <- array(targets[!is.na(targets)],dim = dim(targets))
  targets <- as.factor(targets) # convert to categorical variable

  save(erpdata,targets,
       file = paste(subdirs[this_subd],"/",
                  substring(subdirs[this_subd],first=3),
                  "unifieddata.Rdata",
                  sep = "")
  )
  # cleanup to save memory
  rm(erpdata,targets,this_erp,this_tgt)
}

细分如果你的结构是16x256x200（全部为数字），你应该将其放入数组而不是data.frame。在我看来，这并不“糟糕”。大多数数据都是数字的，但每个16x256“切片”都与一个TGT分类因子配对。你应该在问题中添加你想对R中的数据做什么。
subdirs <- list.dirs(".")

for (this_subd in 2:length(subdirs)) {
  erpdata <- array(dim = c(200,16,256)) # ERP array
  targets <- array(dim = c(200,1)) # Target array
  # look for all mat-files in that directory
  filelist <- list.files(path = subdirs[this_subd], full.names = FALSE, 
                         pattern = "^.*.mat$", 
                         ignore.case = TRUE, recursive = TRUE, 
                         include.dirs = FALSE)

  # combine current subdir path
  filelist <- paste(subdirs[this_subd],filelist,sep="/")

  # Anonymous function to work over each file and resave as R data
  filedatas <- lapply(filelist, function(x) {
    curdata <- readMat(con = x)
    return(curdata)
  })

  for (this_file in 2:length(filedatas)) {
    this_erp <- filedatas[][[this_file]]$eeg
    this_tgt <- this_erp[,,1]$TGT
    this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
    erpdata <- abind(erpdata,this_erp,along=1)
    targets <- rbind(targets,this_tgt)
  }

  # Permute the data into  samples X channels X trials
  erpdata <- aperm(erpdata,c(3,2,1))
  # Remove NAs from originally initializing array
  erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
  targets <- array(targets[!is.na(targets)],dim = dim(targets))
  targets <- as.factor(targets) # convert to categorical variable

  save(erpdata,targets,
       file = paste(subdirs[this_subd],"/",
                  substring(subdirs[this_subd],first=3),
                  "unifieddata.Rdata",
                  sep = "")
  )
  # cleanup to save memory
  rm(erpdata,targets,this_erp,this_tgt)
}