Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/74.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R在多个data.frames之间循环并为其赋值_R_Loops - Fatal编程技术网

R在多个data.frames之间循环并为其赋值

R在多个data.frames之间循环并为其赋值,r,loops,R,Loops,我正在使用R对cnvkit输出执行一些更改(出于我的目的)。问题是:一个样本一个样本地完成任务,脚本就像一个符咒一样工作,但当我把它放入for循环时,它就断了 在Stack Overflow上尝试了很多答案,但都没有帮到我 # Clear workspace rm(list=(ls())) ref <- read.csv("/path/to/reference.cnn", header=T, sep="\t") path <- "/path/to/call_files/" file

我正在使用R对cnvkit输出执行一些更改(出于我的目的)。问题是:一个样本一个样本地完成任务,脚本就像一个符咒一样工作,但当我把它放入for循环时,它就断了

在Stack Overflow上尝试了很多答案,但都没有帮到我

# Clear workspace
rm(list=(ls()))

ref <- read.csv("/path/to/reference.cnn", header=T, sep="\t")
path <- "/path/to/call_files/"
files = list.files(path = path, pattern = "*.final.call.cnr", full.names=FALSE)
for(file in files) {
    perpos <- which(strsplit(file, "")[[1]]==".")
    assign(
    gsub(" ","",substr(file, 1, perpos-1)), 
    read.csv(paste(path,file,sep=""), header=T, sep="\t"))

}


mod_CNV = function(x) {

    # Merge both files by "start" position
    merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))

    # Round "log2" column
    merged$log2.D00893 <- round(merged$log2.files[i], digits=1)

    # re-calculate "cn" based on log2 correction
    merged$cn <- round(2*(2^(merged$log2.files[i])))

    # Subset file with all "cn" values that are not 2
    alt.cn <- subset(merged, merged$cn !=2)

    # Create new data with columns of interest
    alt.cns <- as.data.frame(alt.cn[, c(1:8,13)])

    # Re-order columns for better view
    alt.cns <- alt.cns[c(2,1,3,4,6,5,8,7,9)]

    # Calculate ratio between coverages
    alt.cns$depth.ratio <- round(alt.cns$depth.files[i] / alt.cns$depth.ref, digits=2)
    alt.cns$depth.ratio.1 <- round(alt.cns$depth.files[i] / alt.cns$depth.ref, digits=2)

    ## Function to call for DUP or DEL.  
    alt.cns$SV_type <- ifelse(alt.cns$cn < 2, "DEL", "DUP")

    # Convert "alt.cns" to .bed file
    full <- alt.cns[c(1,2,3,12,5,4,6,7,8,9,10)]
    names(full)[1] <- "#Chrom"
    names(full)[2] <- "Start"
    names(full)[3] <- "End"
    names(full)[4] <- "SV_type"
    names(full)[6] <- "gene"
    names(full)[7] <- "log2"

    # Save "alt.cns" as .bed file
    write.table(full, file="/path/to/output/files[i].bed", row.names=F, col.names=T, sep="\t")

    # Filter "alt.cns" file
    filtered <- subset(alt.cns, alt.cns$depth.ratio < 0.70 | alt.cns$depth.ratio > 1.40 & alt.cns$weight > 0.3)
    filtered <- filtered[c(1,2,3,12,5,4,6,7,8,9,10)]
    names(filtered)[1] <- "#Chrom"
    names(filtered)[2] <- "Start"
    names(filtered)[3] <- "End"
    names(filtered)[4] <- "SV_type"
    names(filtered)[6] <- "gene"
    names(filtered)[7] <- "log2"

    #Save file
    write.table(filtered, file="/path/to/output/files[i].bed", row.names=F, col.names=T, sep="\t")

}


for ( i in seq_along(files)) {
        mod_CNV(files[i])
    }
#清除工作区
rm(列表=(ls()))

ref欢迎来到StackOverflow

您已经声明了一个函数:

mod_CNV = function(x) {

    # Merge both files by "start" position
    merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
    .
    .
    .
}
i
for
循环的局部变量。如果希望它在mod_CNV中可用,则需要将其作为参数传入

您要传递给mod_CNV的是文件名。在
mod_CNV
的内部,这个文件名被称为
x
,但是我在
mod_CNV
中没有看到使用
x
的地方

以下是您应该如何声明函数和使用传入的文件名:

mod_CNV = function(filename) {

    # Merge both files by "start" position
    merged <- merge(filename, ref, by="start", suffixes=c(filename, ".ref"))
    .
    .
    .
    # replace all other occurrences of `file[i]` with `filename`
}
另外,我以前没有使用过
merge
,也不知道您到底想做什么。。。但我发现使用整个文件名作为后缀很奇怪。但这可能是你想要的


无论如何,这些信息应该足以帮助您解决问题。

欢迎使用StackOverflow

您已经声明了一个函数:

mod_CNV = function(x) {

    # Merge both files by "start" position
    merged <- merge(files[i], ref, by="start", suffixes=c(".files[i]", ".ref"))
    .
    .
    .
}
i
for
循环的局部变量。如果希望它在mod_CNV中可用,则需要将其作为参数传入

您要传递给mod_CNV的是文件名。在
mod_CNV
的内部,这个文件名被称为
x
,但是我在
mod_CNV
中没有看到使用
x
的地方

以下是您应该如何声明函数和使用传入的文件名:

mod_CNV = function(filename) {

    # Merge both files by "start" position
    merged <- merge(filename, ref, by="start", suffixes=c(filename, ".ref"))
    .
    .
    .
    # replace all other occurrences of `file[i]` with `filename`
}
另外,我以前没有使用过
merge
,也不知道您到底想做什么。。。但我发现使用整个文件名作为后缀很奇怪。但这可能是你想要的


无论如何,这应该是足够的信息来解决您的问题。

对于那些遇到与我相同问题的人,有正确的代码:

path <- "/path/to/files/"
files = list.files(path = path, pattern = "*.file.ext", full.names=FALSE)
for(file in files) {
    perpos <- which(strsplit(file, "")[[1]]==".")
    assign(
    gsub(" ","",substr(file, 1, perpos-1)), 
    read.csv(paste(path,file,sep=""), header=T, sep="\t"))

}

s_ref <- read.csv("/read/ref/file", header=T, sep="\t")
s_ref["depth.ref.norm"] <- round(s_ref["depth"]/mean(s_ref[["depth"]]), digits=2)

mod_CNV = function(file) {
    file_df <- read.csv(file, header=T, sep="\t")

    # Normalize $depth by mean
    file_df[sprintf("depth.%s.norm", file)] <- round(file_df[["depth"]]/mean(file_df[["depth"]]), digits=2)

    # Merge both files by "start" position
    merged <- merge(file_df, s_ref, by="start", suffixes=c(sprintf(".%s", file), ".ref"), all=TRUE)

    # Round "log2" column
    log2_col_name = sprintf("log2.%s", file)
    merged[log2_col_name] <- round(merged[[log2_col_name]], digits=1)

    # re-calculate "cn" based on log2 correction
    merged["cn"] <- round(2*(2^(merged[[log2_col_name]])))

    # Subset file with all "cn" values that are not 2
    alt_cn <- subset(merged, merged[["cn"]] != 2)

    # Create new data with columns of interest
    alt_cns <- as.data.frame(alt_cn[, c(1:9,14,18)])

    # Re-order columns for better view
    alt_cns <- alt_cns[c(2,1,3,4,6,5,8,7,9,10,11)]

    # Calculate ratio between coverages
    alt_cns["depth.ratio.norm"] <- round(alt_cns[[sprintf("depth.%s.norm", file)]] / alt_cns[["depth.ref.norm"]], digits=2)

    alt_cns["depth.ratio"] <- round(alt_cns[[sprintf("depth.%s", file)]] / alt_cns[["depth.ref"]], digits=2)

    ## Function to call for DUP or DEL.  
    alt_cns["SV_type"] <- ifelse(alt_cns$cn < 2, "DEL", "AMP")

    # Convert "alt.cns" to .bed file
    full <- alt_cns[c(1,2,3,14,5,4,6,7,8,9,10,11,12,13)]
    names(full)[1] <- "#Chrom"
    names(full)[2] <- "Start"
    names(full)[3] <- "End"
    names(full)[4] <- "SV_type"
    names(full)[6] <- "gene"
    names(full)[7] <- "log2"

    full["weight"] <- round(full[["weight"]], digits = 2)
    full <- full[order(full$"#Chrom"),]

    # Save "full" as .bed file
    output_file = sprintf("/path/%s.bed", file)
    write.table(full, file=output_file, row.names=F, col.names=T, sep="\t", dec=",")

}
    print(files)
    for (file in files) {
        mod_CNV(file)
}

path对于那些遇到与我相同问题的人,有正确的代码:

path <- "/path/to/files/"
files = list.files(path = path, pattern = "*.file.ext", full.names=FALSE)
for(file in files) {
    perpos <- which(strsplit(file, "")[[1]]==".")
    assign(
    gsub(" ","",substr(file, 1, perpos-1)), 
    read.csv(paste(path,file,sep=""), header=T, sep="\t"))

}

s_ref <- read.csv("/read/ref/file", header=T, sep="\t")
s_ref["depth.ref.norm"] <- round(s_ref["depth"]/mean(s_ref[["depth"]]), digits=2)

mod_CNV = function(file) {
    file_df <- read.csv(file, header=T, sep="\t")

    # Normalize $depth by mean
    file_df[sprintf("depth.%s.norm", file)] <- round(file_df[["depth"]]/mean(file_df[["depth"]]), digits=2)

    # Merge both files by "start" position
    merged <- merge(file_df, s_ref, by="start", suffixes=c(sprintf(".%s", file), ".ref"), all=TRUE)

    # Round "log2" column
    log2_col_name = sprintf("log2.%s", file)
    merged[log2_col_name] <- round(merged[[log2_col_name]], digits=1)

    # re-calculate "cn" based on log2 correction
    merged["cn"] <- round(2*(2^(merged[[log2_col_name]])))

    # Subset file with all "cn" values that are not 2
    alt_cn <- subset(merged, merged[["cn"]] != 2)

    # Create new data with columns of interest
    alt_cns <- as.data.frame(alt_cn[, c(1:9,14,18)])

    # Re-order columns for better view
    alt_cns <- alt_cns[c(2,1,3,4,6,5,8,7,9,10,11)]

    # Calculate ratio between coverages
    alt_cns["depth.ratio.norm"] <- round(alt_cns[[sprintf("depth.%s.norm", file)]] / alt_cns[["depth.ref.norm"]], digits=2)

    alt_cns["depth.ratio"] <- round(alt_cns[[sprintf("depth.%s", file)]] / alt_cns[["depth.ref"]], digits=2)

    ## Function to call for DUP or DEL.  
    alt_cns["SV_type"] <- ifelse(alt_cns$cn < 2, "DEL", "AMP")

    # Convert "alt.cns" to .bed file
    full <- alt_cns[c(1,2,3,14,5,4,6,7,8,9,10,11,12,13)]
    names(full)[1] <- "#Chrom"
    names(full)[2] <- "Start"
    names(full)[3] <- "End"
    names(full)[4] <- "SV_type"
    names(full)[6] <- "gene"
    names(full)[7] <- "log2"

    full["weight"] <- round(full[["weight"]], digits = 2)
    full <- full[order(full$"#Chrom"),]

    # Save "full" as .bed file
    output_file = sprintf("/path/%s.bed", file)
    write.table(full, file=output_file, row.names=F, col.names=T, sep="\t", dec=",")

}
    print(files)
    for (file in files) {
        mod_CNV(file)
}

path感谢您的帮助,我已经对脚本进行了修改,但在“合并”步骤中仍然会遇到相同的错误。。。我要寻找另一种合并文件的方法!谢谢。我刚意识到我忘了后缀中的“.”。它应该是
后缀=c(粘贴0(“.”,文件名),“.ref”)
。不确定这是否有帮助。谢谢你的帮助,很抱歉之前没有回答。不幸的是,你的回答不正确。所以,我找到了解决办法。我将张贴正确的代码吼叫!谢谢你的帮助,我已经对我的脚本进行了修改,但是我仍然在“合并”步骤中遇到相同的错误。。。我要寻找另一种合并文件的方法!谢谢。我刚意识到我忘了后缀中的“.”。它应该是
后缀=c(粘贴0(“.”,文件名),“.ref”)
。不确定这是否有帮助。谢谢你的帮助,很抱歉之前没有回答。不幸的是,你的回答不正确。所以,我找到了解决办法。我将张贴正确的代码吼叫!