R从文件导入构建大型数据帧
我使用readMat命令从MATLAB.mat文件导入了大量数据。数据有这样的特征布局:R从文件导入构建大型数据帧,r,matlab,lambda,R,Matlab,Lambda,我使用readMat命令从MATLAB.mat文件导入了大量数据。数据有这样的特征布局:struct$ERP:200x256x16,struct$TGT:200x1,以及我正在丢弃的一些其他无关内容。TGT变量是一个分类目标,第三个维度是每个目标的试验指标 在每个单独的主题文件夹中都有七个或更多这样的文件——我想建立一个有效的结构,使我能够快速地进行单独的试验(沿三维切片),同时使用正确的R样式跟踪目标变量 我可以用一个主题的文件以相对笨拙的方式执行以下步骤: require(R.matlab)
struct$ERP:200x256x16
,struct$TGT:200x1
,以及我正在丢弃的一些其他无关内容。TGT
变量是一个分类目标,第三个维度是每个目标的试验指标
在每个单独的主题文件夹中都有七个或更多这样的文件——我想建立一个有效的结构,使我能够快速地进行单独的试验(沿三维切片),同时使用正确的R样式跟踪目标变量
我可以用一个主题的文件以相对笨拙的方式执行以下步骤:
require(R.matlab)
subdirs <- list.dirs(".")
filelist <- list.files(path = '.', full.names = FALSE, pattern = "^.*\\.mat$",
ignore.case = TRUE, recursive = TRUE,
include.dirs = FALSE)
sub1t1src <- as.data.frame(filedatas[1][1][[1]]$eeg)
erp1 <- sub1t1src[1,1]$ERP
erp1 <- aperm(erp1,c(2,3,1)) # data is permuted differently than I would like
erp1r <- apply(erp1,2,rbind)
erp1rdf <- as.data.frame(erp1r)
tgt1 <- sub1t1df[2,1]$TGT
tgt1 <- as.factor(tgt1)
tgt1r <- rep(tgt1,each=256)
sub1t1df <- cbind(erp1rdf,tgt1r)
考虑到罗兰的建议,我想这是一个不好的使用for循环的方法。因为物体太大,我把对象分开了
subdirs <- list.dirs(".")
for (this_subd in 2:length(subdirs)) {
erpdata <- array(dim = c(200,16,256)) # ERP array
targets <- array(dim = c(200,1)) # Target array
# look for all mat-files in that directory
filelist <- list.files(path = subdirs[this_subd], full.names = FALSE,
pattern = "^.*.mat$",
ignore.case = TRUE, recursive = TRUE,
include.dirs = FALSE)
# combine current subdir path
filelist <- paste(subdirs[this_subd],filelist,sep="/")
# Anonymous function to work over each file and resave as R data
filedatas <- lapply(filelist, function(x) {
curdata <- readMat(con = x)
return(curdata)
})
for (this_file in 2:length(filedatas)) {
this_erp <- filedatas[][[this_file]]$eeg
this_tgt <- this_erp[,,1]$TGT
this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
erpdata <- abind(erpdata,this_erp,along=1)
targets <- rbind(targets,this_tgt)
}
# Permute the data into samples X channels X trials
erpdata <- aperm(erpdata,c(3,2,1))
# Remove NAs from originally initializing array
erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
targets <- array(targets[!is.na(targets)],dim = dim(targets))
targets <- as.factor(targets) # convert to categorical variable
save(erpdata,targets,
file = paste(subdirs[this_subd],"/",
substring(subdirs[this_subd],first=3),
"unifieddata.Rdata",
sep = "")
)
# cleanup to save memory
rm(erpdata,targets,this_erp,this_tgt)
}
细分如果你的结构是16x256x200
(全部为数字),你应该将其放入数组而不是data.frame。在我看来,这并不“糟糕”。大多数数据都是数字的,但每个16x256“切片”都与一个TGT分类因子配对。你应该在问题中添加你想对R中的数据做什么。
subdirs <- list.dirs(".")
for (this_subd in 2:length(subdirs)) {
erpdata <- array(dim = c(200,16,256)) # ERP array
targets <- array(dim = c(200,1)) # Target array
# look for all mat-files in that directory
filelist <- list.files(path = subdirs[this_subd], full.names = FALSE,
pattern = "^.*.mat$",
ignore.case = TRUE, recursive = TRUE,
include.dirs = FALSE)
# combine current subdir path
filelist <- paste(subdirs[this_subd],filelist,sep="/")
# Anonymous function to work over each file and resave as R data
filedatas <- lapply(filelist, function(x) {
curdata <- readMat(con = x)
return(curdata)
})
for (this_file in 2:length(filedatas)) {
this_erp <- filedatas[][[this_file]]$eeg
this_tgt <- this_erp[,,1]$TGT
this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
erpdata <- abind(erpdata,this_erp,along=1)
targets <- rbind(targets,this_tgt)
}
# Permute the data into samples X channels X trials
erpdata <- aperm(erpdata,c(3,2,1))
# Remove NAs from originally initializing array
erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
targets <- array(targets[!is.na(targets)],dim = dim(targets))
targets <- as.factor(targets) # convert to categorical variable
save(erpdata,targets,
file = paste(subdirs[this_subd],"/",
substring(subdirs[this_subd],first=3),
"unifieddata.Rdata",
sep = "")
)
# cleanup to save memory
rm(erpdata,targets,this_erp,this_tgt)
}