R在多个data.frames之间循环并为其赋值
我正在使用R对cnvkit输出执行一些更改(出于我的目的)。问题是:一个样本一个样本地完成任务,脚本就像一个符咒一样工作,但当我把它放入for循环时,它就断了 在Stack Overflow上尝试了很多答案,但都没有帮到我R在多个data.frames之间循环并为其赋值,r,loops,R,Loops,我正在使用R对cnvkit输出执行一些更改(出于我的目的)。问题是:一个样本一个样本地完成任务,脚本就像一个符咒一样工作,但当我把它放入for循环时,它就断了 在Stack Overflow上尝试了很多答案,但都没有帮到我 # Clear workspace rm(list=(ls())) ref <- read.csv("/path/to/reference.cnn", header=T, sep="\t") path <- "/path/to/call_files/" file
# Clear workspace
rm(list=(ls()))
ref <- read.csv("/path/to/reference.cnn", header=T, sep="\t")
path <- "/path/to/call_files/"
files = list.files(path = path, pattern = "*.final.call.cnr", full.names=FALSE)
for(file in files) {
perpos <- which(strsplit(file, "")[[1]]==".")
assign(
gsub(" ","",substr(file, 1, perpos-1)),
read.csv(paste(path,file,sep=""), header=T, sep="\t"))
}
mod_CNV = function(x) {
# Merge both files by "start" position
merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
# Round "log2" column
merged$log2.D00893 <- round(merged$log2.files[i], digits=1)
# re-calculate "cn" based on log2 correction
merged$cn <- round(2*(2^(merged$log2.files[i])))
# Subset file with all "cn" values that are not 2
alt.cn <- subset(merged, merged$cn !=2)
# Create new data with columns of interest
alt.cns <- as.data.frame(alt.cn[, c(1:8,13)])
# Re-order columns for better view
alt.cns <- alt.cns[c(2,1,3,4,6,5,8,7,9)]
# Calculate ratio between coverages
alt.cns$depth.ratio <- round(alt.cns$depth.files[i] / alt.cns$depth.ref, digits=2)
alt.cns$depth.ratio.1 <- round(alt.cns$depth.files[i] / alt.cns$depth.ref, digits=2)
## Function to call for DUP or DEL.
alt.cns$SV_type <- ifelse(alt.cns$cn < 2, "DEL", "DUP")
# Convert "alt.cns" to .bed file
full <- alt.cns[c(1,2,3,12,5,4,6,7,8,9,10)]
names(full)[1] <- "#Chrom"
names(full)[2] <- "Start"
names(full)[3] <- "End"
names(full)[4] <- "SV_type"
names(full)[6] <- "gene"
names(full)[7] <- "log2"
# Save "alt.cns" as .bed file
write.table(full, file="/path/to/output/files[i].bed", row.names=F, col.names=T, sep="\t")
# Filter "alt.cns" file
filtered <- subset(alt.cns, alt.cns$depth.ratio < 0.70 | alt.cns$depth.ratio > 1.40 & alt.cns$weight > 0.3)
filtered <- filtered[c(1,2,3,12,5,4,6,7,8,9,10)]
names(filtered)[1] <- "#Chrom"
names(filtered)[2] <- "Start"
names(filtered)[3] <- "End"
names(filtered)[4] <- "SV_type"
names(filtered)[6] <- "gene"
names(filtered)[7] <- "log2"
#Save file
write.table(filtered, file="/path/to/output/files[i].bed", row.names=F, col.names=T, sep="\t")
}
for ( i in seq_along(files)) {
mod_CNV(files[i])
}
#清除工作区
rm(列表=(ls()))
ref欢迎来到StackOverflow
您已经声明了一个函数:
mod_CNV = function(x) {
# Merge both files by "start" position
merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
.
.
.
}
i
是for
循环的局部变量。如果希望它在mod_CNV中可用,则需要将其作为参数传入
您要传递给mod_CNV的是文件名。在mod_CNV
的内部,这个文件名被称为x
,但是我在mod_CNV
中没有看到使用x
的地方
以下是您应该如何声明函数和使用传入的文件名:
mod_CNV = function(filename) {
# Merge both files by "start" position
merged <- merge(filename, ref, by="start", suffixes=c(filename, ".ref"))
.
.
.
# replace all other occurrences of `file[i]` with `filename`
}
另外,我以前没有使用过merge
,也不知道您到底想做什么。。。但我发现使用整个文件名作为后缀很奇怪。但这可能是你想要的
无论如何,这些信息应该足以帮助您解决问题。欢迎使用StackOverflow
您已经声明了一个函数:
mod_CNV = function(x) {
# Merge both files by "start" position
merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
.
.
.
}
i
是for
循环的局部变量。如果希望它在mod_CNV中可用,则需要将其作为参数传入
您要传递给mod_CNV的是文件名。在mod_CNV
的内部,这个文件名被称为x
,但是我在mod_CNV
中没有看到使用x
的地方
以下是您应该如何声明函数和使用传入的文件名:
mod_CNV = function(filename) {
# Merge both files by "start" position
merged <- merge(filename, ref, by="start", suffixes=c(filename, ".ref"))
.
.
.
# replace all other occurrences of `file[i]` with `filename`
}
另外,我以前没有使用过merge
,也不知道您到底想做什么。。。但我发现使用整个文件名作为后缀很奇怪。但这可能是你想要的
无论如何,这应该是足够的信息来解决您的问题。对于那些遇到与我相同问题的人,有正确的代码:
path <- "/path/to/files/"
files = list.files(path = path, pattern = "*.file.ext", full.names=FALSE)
for(file in files) {
perpos <- which(strsplit(file, "")[[1]]==".")
assign(
gsub(" ","",substr(file, 1, perpos-1)),
read.csv(paste(path,file,sep=""), header=T, sep="\t"))
}
s_ref <- read.csv("/read/ref/file", header=T, sep="\t")
s_ref["depth.ref.norm"] <- round(s_ref["depth"]/mean(s_ref[["depth"]]), digits=2)
mod_CNV = function(file) {
file_df <- read.csv(file, header=T, sep="\t")
# Normalize $depth by mean
file_df[sprintf("depth.%s.norm", file)] <- round(file_df[["depth"]]/mean(file_df[["depth"]]), digits=2)
# Merge both files by "start" position
merged <- merge(file_df, s_ref, by="start", suffixes=c(sprintf(".%s", file), ".ref"), all=TRUE)
# Round "log2" column
log2_col_name = sprintf("log2.%s", file)
merged[log2_col_name] <- round(merged[[log2_col_name]], digits=1)
# re-calculate "cn" based on log2 correction
merged["cn"] <- round(2*(2^(merged[[log2_col_name]])))
# Subset file with all "cn" values that are not 2
alt_cn <- subset(merged, merged[["cn"]] != 2)
# Create new data with columns of interest
alt_cns <- as.data.frame(alt_cn[, c(1:9,14,18)])
# Re-order columns for better view
alt_cns <- alt_cns[c(2,1,3,4,6,5,8,7,9,10,11)]
# Calculate ratio between coverages
alt_cns["depth.ratio.norm"] <- round(alt_cns[[sprintf("depth.%s.norm", file)]] / alt_cns[["depth.ref.norm"]], digits=2)
alt_cns["depth.ratio"] <- round(alt_cns[[sprintf("depth.%s", file)]] / alt_cns[["depth.ref"]], digits=2)
## Function to call for DUP or DEL.
alt_cns["SV_type"] <- ifelse(alt_cns$cn < 2, "DEL", "AMP")
# Convert "alt.cns" to .bed file
full <- alt_cns[c(1,2,3,14,5,4,6,7,8,9,10,11,12,13)]
names(full)[1] <- "#Chrom"
names(full)[2] <- "Start"
names(full)[3] <- "End"
names(full)[4] <- "SV_type"
names(full)[6] <- "gene"
names(full)[7] <- "log2"
full["weight"] <- round(full[["weight"]], digits = 2)
full <- full[order(full$"#Chrom"),]
# Save "full" as .bed file
output_file = sprintf("/path/%s.bed", file)
write.table(full, file=output_file, row.names=F, col.names=T, sep="\t", dec=",")
}
print(files)
for (file in files) {
mod_CNV(file)
}
path对于那些遇到与我相同问题的人,有正确的代码:
path <- "/path/to/files/"
files = list.files(path = path, pattern = "*.file.ext", full.names=FALSE)
for(file in files) {
perpos <- which(strsplit(file, "")[[1]]==".")
assign(
gsub(" ","",substr(file, 1, perpos-1)),
read.csv(paste(path,file,sep=""), header=T, sep="\t"))
}
s_ref <- read.csv("/read/ref/file", header=T, sep="\t")
s_ref["depth.ref.norm"] <- round(s_ref["depth"]/mean(s_ref[["depth"]]), digits=2)
mod_CNV = function(file) {
file_df <- read.csv(file, header=T, sep="\t")
# Normalize $depth by mean
file_df[sprintf("depth.%s.norm", file)] <- round(file_df[["depth"]]/mean(file_df[["depth"]]), digits=2)
# Merge both files by "start" position
merged <- merge(file_df, s_ref, by="start", suffixes=c(sprintf(".%s", file), ".ref"), all=TRUE)
# Round "log2" column
log2_col_name = sprintf("log2.%s", file)
merged[log2_col_name] <- round(merged[[log2_col_name]], digits=1)
# re-calculate "cn" based on log2 correction
merged["cn"] <- round(2*(2^(merged[[log2_col_name]])))
# Subset file with all "cn" values that are not 2
alt_cn <- subset(merged, merged[["cn"]] != 2)
# Create new data with columns of interest
alt_cns <- as.data.frame(alt_cn[, c(1:9,14,18)])
# Re-order columns for better view
alt_cns <- alt_cns[c(2,1,3,4,6,5,8,7,9,10,11)]
# Calculate ratio between coverages
alt_cns["depth.ratio.norm"] <- round(alt_cns[[sprintf("depth.%s.norm", file)]] / alt_cns[["depth.ref.norm"]], digits=2)
alt_cns["depth.ratio"] <- round(alt_cns[[sprintf("depth.%s", file)]] / alt_cns[["depth.ref"]], digits=2)
## Function to call for DUP or DEL.
alt_cns["SV_type"] <- ifelse(alt_cns$cn < 2, "DEL", "AMP")
# Convert "alt.cns" to .bed file
full <- alt_cns[c(1,2,3,14,5,4,6,7,8,9,10,11,12,13)]
names(full)[1] <- "#Chrom"
names(full)[2] <- "Start"
names(full)[3] <- "End"
names(full)[4] <- "SV_type"
names(full)[6] <- "gene"
names(full)[7] <- "log2"
full["weight"] <- round(full[["weight"]], digits = 2)
full <- full[order(full$"#Chrom"),]
# Save "full" as .bed file
output_file = sprintf("/path/%s.bed", file)
write.table(full, file=output_file, row.names=F, col.names=T, sep="\t", dec=",")
}
print(files)
for (file in files) {
mod_CNV(file)
}
path感谢您的帮助,我已经对脚本进行了修改,但在“合并”步骤中仍然会遇到相同的错误。。。我要寻找另一种合并文件的方法!谢谢。我刚意识到我忘了后缀中的“.”。它应该是后缀=c(粘贴0(“.”,文件名),“.ref”)
。不确定这是否有帮助。谢谢你的帮助,很抱歉之前没有回答。不幸的是,你的回答不正确。所以,我找到了解决办法。我将张贴正确的代码吼叫!谢谢你的帮助,我已经对我的脚本进行了修改,但是我仍然在“合并”步骤中遇到相同的错误。。。我要寻找另一种合并文件的方法!谢谢。我刚意识到我忘了后缀中的“.”。它应该是后缀=c(粘贴0(“.”,文件名),“.ref”)
。不确定这是否有帮助。谢谢你的帮助,很抱歉之前没有回答。不幸的是,你的回答不正确。所以,我找到了解决办法。我将张贴正确的代码吼叫!