Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/289.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python ';位置感知&x27;序列与字母注释的对齐_Python_R_Bioinformatics_Biopython_Bioconductor - Fatal编程技术网

Python ';位置感知&x27;序列与字母注释的对齐

Python ';位置感知&x27;序列与字母注释的对齐,python,r,bioinformatics,biopython,bioconductor,Python,R,Bioinformatics,Biopython,Bioconductor,我们有2个DNA序列(字符串): 预期输出:首先,我们需要对齐这两个字符串,然后按索引获取相关注释: ATGCAT AT-CAT 13-198 第一部分可以使用Biostrings包完成: 库(生物串) p可能的解决方案: dna_fun <- function(s, p, a) { s <- strsplit(s, "")[[1]] p <- strsplit(p, "")[[1]] a <- strsplit(a, "")[[1]] ls <-

我们有2个DNA序列(字符串):

预期输出:首先,我们需要对齐这两个字符串,然后按索引获取相关注释:

ATGCAT
AT-CAT
13-198
第一部分可以使用Biostrings包完成:

库(生物串)
p可能的解决方案:

dna_fun <- function(s, p, a) {
  s <- strsplit(s, "")[[1]]
  p <- strsplit(p, "")[[1]]
  a <- strsplit(a, "")[[1]]
  ls <- length(s)
  lp <- length(p)

  r <- lapply(c(1,seq(lp)), function(x) {
    v <- rep(1, 5)
    v[x] <- 2
    v
  })

  mat <- sapply(r, rep, x = p)
  tfm <- mat == matrix(rep(s, ls), ncol = ls)
  m <- which.max(colSums(tfm))

  p2 <- mat[, m]
  p2[!tfm[,m]] <- "-"

  a[!tfm[,m]] <- "-"

  p2 <- paste(p2, collapse = "")
  a <- paste(a, collapse = "")

  return(list(p2, a))
}
from Bio import Align

p = "ATCAT"
s = "ATGCAT"
s_annot = "135198"

aligner = Align.PairwiseAligner()
alignment = str(aligner.align(p, s)[0]).split()
middle = alignment.pop(1)
alignment.append("".join(c if m == "|" else m for c, m in zip(s_annot, middle)))

print("\n".join(alignment))
你会得到:


如果您有相应的载体,您可以将
Map
dna\u-fun
-功能一起使用:

s11 <- c("ATGCAT","ATCGAT")
s22 <- c("ATCAT","ATCAT")
annot2 <- c("135198","145892")

lm <- Map(dna_fun, s11, s22, annot2)

data.table::rbindlist(lm, idcol = "dna")

数据:


s1根据要求,Biopython解决方案:

dna_fun <- function(s, p, a) {
  s <- strsplit(s, "")[[1]]
  p <- strsplit(p, "")[[1]]
  a <- strsplit(a, "")[[1]]
  ls <- length(s)
  lp <- length(p)

  r <- lapply(c(1,seq(lp)), function(x) {
    v <- rep(1, 5)
    v[x] <- 2
    v
  })

  mat <- sapply(r, rep, x = p)
  tfm <- mat == matrix(rep(s, ls), ncol = ls)
  m <- which.max(colSums(tfm))

  p2 <- mat[, m]
  p2[!tfm[,m]] <- "-"

  a[!tfm[,m]] <- "-"

  p2 <- paste(p2, collapse = "")
  a <- paste(a, collapse = "")

  return(list(p2, a))
}
from Bio import Align

p = "ATCAT"
s = "ATGCAT"
s_annot = "135198"

aligner = Align.PairwiseAligner()
alignment = str(aligner.align(p, s)[0]).split()
middle = alignment.pop(1)
alignment.append("".join(c if m == "|" else m for c, m in zip(s_annot, middle)))

print("\n".join(alignment))
输出:

AT-CAT
ATGCAT
13-198
[[1]]
[1] "AT-CAT"

[[2]]
[1] "13-198"
s11 <- c("ATGCAT","ATCGAT")
s22 <- c("ATCAT","ATCAT")
annot2 <- c("135198","145892")

lm <- Map(dna_fun, s11, s22, annot2)

data.table::rbindlist(lm, idcol = "dna")
      dna     V1     V2
1: ATGCAT AT-CAT 13-198
2: ATCGAT ATC-AT 145-92
s1 <- "ATGCAT"
s2 <- "ATCAT"
annot <- "135198"
from Bio import Align

p = "ATCAT"
s = "ATGCAT"
s_annot = "135198"

aligner = Align.PairwiseAligner()
alignment = str(aligner.align(p, s)[0]).split()
middle = alignment.pop(1)
alignment.append("".join(c if m == "|" else m for c, m in zip(s_annot, middle)))

print("\n".join(alignment))
AT-CAT
ATGCAT
13-198