R在多个data.frames之间循环并为其赋值_R_Loops

R在多个data.frames之间循环并为其赋值

r loops

R在多个data.frames之间循环并为其赋值,r,loops,R,Loops,我正在使用R对cnvkit输出执行一些更改（出于我的目的）。问题是：一个样本一个样本地完成任务，脚本就像一个符咒一样工作，但当我把它放入for循环时，它就断了在Stack Overflow上尝试了很多答案，但都没有帮到我 # Clear workspace rm(list=(ls())) ref <- read.csv("/path/to/reference.cnn", header=T, sep="\t") path <- "/path/to/call_files/" file

我正在使用R对cnvkit输出执行一些更改（出于我的目的）。问题是：一个样本一个样本地完成任务，脚本就像一个符咒一样工作，但当我把它放入for循环时，它就断了

在Stack Overflow上尝试了很多答案，但都没有帮到我

# Clear workspace
rm(list=(ls()))

ref <- read.csv("/path/to/reference.cnn", header=T, sep="\t")
path <- "/path/to/call_files/"
files = list.files(path = path, pattern = "*.final.call.cnr", full.names=FALSE)
for(file in files) {
    perpos <- which(strsplit(file, "")[[1]]==".")
    assign(
    gsub(" ","",substr(file, 1, perpos-1)), 
    read.csv(paste(path,file,sep=""), header=T, sep="\t"))

}


mod_CNV = function(x) {

    # Merge both files by "start" position
    merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))

    # Round "log2" column
    merged$log2.D00893 <- round(merged$log2.files[i], digits=1)

    # re-calculate "cn" based on log2 correction
    merged$cn <- round(2*(2^(merged$log2.files[i])))

    # Subset file with all "cn" values that are not 2
    alt.cn <- subset(merged, merged$cn !=2)

    # Create new data with columns of interest
    alt.cns <- as.data.frame(alt.cn[, c(1:8,13)])

    # Re-order columns for better view
    alt.cns <- alt.cns[c(2,1,3,4,6,5,8,7,9)]

    # Calculate ratio between coverages
    alt.cns$depth.ratio <- round(alt.cns$depth.files[i] / alt.cns$depth.ref, digits=2)
    alt.cns$depth.ratio.1 <- round(alt.cns$depth.files[i] / alt.cns$depth.ref, digits=2)

    ## Function to call for DUP or DEL.  
    alt.cns$SV_type <- ifelse(alt.cns$cn < 2, "DEL", "DUP")

    # Convert "alt.cns" to .bed file
    full <- alt.cns[c(1,2,3,12,5,4,6,7,8,9,10)]
    names(full)[1] <- "#Chrom"
    names(full)[2] <- "Start"
    names(full)[3] <- "End"
    names(full)[4] <- "SV_type"
    names(full)[6] <- "gene"
    names(full)[7] <- "log2"

    # Save "alt.cns" as .bed file
    write.table(full, file="/path/to/output/files[i].bed", row.names=F, col.names=T, sep="\t")

    # Filter "alt.cns" file
    filtered <- subset(alt.cns, alt.cns$depth.ratio < 0.70 | alt.cns$depth.ratio > 1.40 & alt.cns$weight > 0.3)
    filtered <- filtered[c(1,2,3,12,5,4,6,7,8,9,10)]
    names(filtered)[1] <- "#Chrom"
    names(filtered)[2] <- "Start"
    names(filtered)[3] <- "End"
    names(filtered)[4] <- "SV_type"
    names(filtered)[6] <- "gene"
    names(filtered)[7] <- "log2"

    #Save file
    write.table(filtered, file="/path/to/output/files[i].bed", row.names=F, col.names=T, sep="\t")

}


for ( i in seq_along(files)) {
        mod_CNV(files[i])
    }

#清除工作区
rm（列表=（ls（）））
ref欢迎来到StackOverflow
您已经声明了一个函数：
mod_CNV = function(x) {

    # Merge both files by "start" position
    merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
    .
    .
    .
}

i
是for
循环的局部变量。如果希望它在mod_CNV中可用，则需要将其作为参数传入
您要传递给mod_CNV的是文件名。在mod_CNV
的内部，这个文件名被称为x
，但是我在mod_CNV
中没有看到使用x
的地方
以下是您应该如何声明函数和使用传入的文件名：
mod_CNV = function(filename) {

    # Merge both files by "start" position
    merged <- merge(filename, ref, by="start", suffixes=c(filename, ".ref"))
    .
    .
    .
    # replace all other occurrences of `file[i]` with `filename`
}

另外，我以前没有使用过merge
，也不知道您到底想做什么。。。但我发现使用整个文件名作为后缀很奇怪。但这可能是你想要的
无论如何，这些信息应该足以帮助您解决问题。
欢迎使用StackOverflow
您已经声明了一个函数：
mod_CNV = function(x) {

    # Merge both files by "start" position
    merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
    .
    .
    .
}

i
是for
循环的局部变量。如果希望它在mod_CNV中可用，则需要将其作为参数传入
您要传递给mod_CNV的是文件名。在mod_CNV
的内部，这个文件名被称为x
，但是我在mod_CNV
中没有看到使用x
的地方
以下是您应该如何声明函数和使用传入的文件名：
mod_CNV = function(filename) {

    # Merge both files by "start" position
    merged <- merge(filename, ref, by="start", suffixes=c(filename, ".ref"))
    .
    .
    .
    # replace all other occurrences of `file[i]` with `filename`
}

另外，我以前没有使用过merge
，也不知道您到底想做什么。。。但我发现使用整个文件名作为后缀很奇怪。但这可能是你想要的
无论如何，这应该是足够的信息来解决您的问题。
对于那些遇到与我相同问题的人，有正确的代码：
path <- "/path/to/files/"
files = list.files(path = path, pattern = "*.file.ext", full.names=FALSE)
for(file in files) {
    perpos <- which(strsplit(file, "")[[1]]==".")
    assign(
    gsub(" ","",substr(file, 1, perpos-1)), 
    read.csv(paste(path,file,sep=""), header=T, sep="\t"))

}

s_ref <- read.csv("/read/ref/file", header=T, sep="\t")
s_ref["depth.ref.norm"] <- round(s_ref["depth"]/mean(s_ref[["depth"]]), digits=2)

mod_CNV = function(file) {
    file_df <- read.csv(file, header=T, sep="\t")

    # Normalize $depth by mean
    file_df[sprintf("depth.%s.norm", file)] <- round(file_df[["depth"]]/mean(file_df[["depth"]]), digits=2)

    # Merge both files by "start" position
    merged <- merge(file_df, s_ref, by="start", suffixes=c(sprintf(".%s", file), ".ref"), all=TRUE)

    # Round "log2" column
    log2_col_name = sprintf("log2.%s", file)
    merged[log2_col_name] <- round(merged[[log2_col_name]], digits=1)

    # re-calculate "cn" based on log2 correction
    merged["cn"] <- round(2*(2^(merged[[log2_col_name]])))

    # Subset file with all "cn" values that are not 2
    alt_cn <- subset(merged, merged[["cn"]] != 2)

    # Create new data with columns of interest
    alt_cns <- as.data.frame(alt_cn[, c(1:9,14,18)])

    # Re-order columns for better view
    alt_cns <- alt_cns[c(2,1,3,4,6,5,8,7,9,10,11)]

    # Calculate ratio between coverages
    alt_cns["depth.ratio.norm"] <- round(alt_cns[[sprintf("depth.%s.norm", file)]] / alt_cns[["depth.ref.norm"]], digits=2)

    alt_cns["depth.ratio"] <- round(alt_cns[[sprintf("depth.%s", file)]] / alt_cns[["depth.ref"]], digits=2)

    ## Function to call for DUP or DEL.  
    alt_cns["SV_type"] <- ifelse(alt_cns$cn < 2, "DEL", "AMP")

    # Convert "alt.cns" to .bed file
    full <- alt_cns[c(1,2,3,14,5,4,6,7,8,9,10,11,12,13)]
    names(full)[1] <- "#Chrom"
    names(full)[2] <- "Start"
    names(full)[3] <- "End"
    names(full)[4] <- "SV_type"
    names(full)[6] <- "gene"
    names(full)[7] <- "log2"

    full["weight"] <- round(full[["weight"]], digits = 2)
    full <- full[order(full$"#Chrom"),]

    # Save "full" as .bed file
    output_file = sprintf("/path/%s.bed", file)
    write.table(full, file=output_file, row.names=F, col.names=T, sep="\t", dec=",")

}
    print(files)
    for (file in files) {
        mod_CNV(file)
}

path对于那些遇到与我相同问题的人，有正确的代码：
path <- "/path/to/files/"
files = list.files(path = path, pattern = "*.file.ext", full.names=FALSE)
for(file in files) {
    perpos <- which(strsplit(file, "")[[1]]==".")
    assign(
    gsub(" ","",substr(file, 1, perpos-1)), 
    read.csv(paste(path,file,sep=""), header=T, sep="\t"))

}

s_ref <- read.csv("/read/ref/file", header=T, sep="\t")
s_ref["depth.ref.norm"] <- round(s_ref["depth"]/mean(s_ref[["depth"]]), digits=2)

mod_CNV = function(file) {
    file_df <- read.csv(file, header=T, sep="\t")

    # Normalize $depth by mean
    file_df[sprintf("depth.%s.norm", file)] <- round(file_df[["depth"]]/mean(file_df[["depth"]]), digits=2)

    # Merge both files by "start" position
    merged <- merge(file_df, s_ref, by="start", suffixes=c(sprintf(".%s", file), ".ref"), all=TRUE)

    # Round "log2" column
    log2_col_name = sprintf("log2.%s", file)
    merged[log2_col_name] <- round(merged[[log2_col_name]], digits=1)

    # re-calculate "cn" based on log2 correction
    merged["cn"] <- round(2*(2^(merged[[log2_col_name]])))

    # Subset file with all "cn" values that are not 2
    alt_cn <- subset(merged, merged[["cn"]] != 2)

    # Create new data with columns of interest
    alt_cns <- as.data.frame(alt_cn[, c(1:9,14,18)])

    # Re-order columns for better view
    alt_cns <- alt_cns[c(2,1,3,4,6,5,8,7,9,10,11)]

    # Calculate ratio between coverages
    alt_cns["depth.ratio.norm"] <- round(alt_cns[[sprintf("depth.%s.norm", file)]] / alt_cns[["depth.ref.norm"]], digits=2)

    alt_cns["depth.ratio"] <- round(alt_cns[[sprintf("depth.%s", file)]] / alt_cns[["depth.ref"]], digits=2)

    ## Function to call for DUP or DEL.  
    alt_cns["SV_type"] <- ifelse(alt_cns$cn < 2, "DEL", "AMP")

    # Convert "alt.cns" to .bed file
    full <- alt_cns[c(1,2,3,14,5,4,6,7,8,9,10,11,12,13)]
    names(full)[1] <- "#Chrom"
    names(full)[2] <- "Start"
    names(full)[3] <- "End"
    names(full)[4] <- "SV_type"
    names(full)[6] <- "gene"
    names(full)[7] <- "log2"

    full["weight"] <- round(full[["weight"]], digits = 2)
    full <- full[order(full$"#Chrom"),]

    # Save "full" as .bed file
    output_file = sprintf("/path/%s.bed", file)
    write.table(full, file=output_file, row.names=F, col.names=T, sep="\t", dec=",")

}
    print(files)
    for (file in files) {
        mod_CNV(file)
}

path感谢您的帮助，我已经对脚本进行了修改，但在“合并”步骤中仍然会遇到相同的错误。。。我要寻找另一种合并文件的方法！谢谢。我刚意识到我忘了后缀中的“.”。它应该是后缀=c（粘贴0（“.”，文件名），“.ref”）
。不确定这是否有帮助。谢谢你的帮助，很抱歉之前没有回答。不幸的是，你的回答不正确。所以，我找到了解决办法。我将张贴正确的代码吼叫！谢谢你的帮助，我已经对我的脚本进行了修改，但是我仍然在“合并”步骤中遇到相同的错误。。。我要寻找另一种合并文件的方法！谢谢。我刚意识到我忘了后缀中的“.”。它应该是后缀=c（粘贴0（“.”，文件名），“.ref”）
。不确定这是否有帮助。谢谢你的帮助，很抱歉之前没有回答。不幸的是，你的回答不正确。所以，我找到了解决办法。我将张贴正确的代码吼叫！