尝试使用自定义python脚本查找Cooccupacy

尝试使用自定义python脚本查找Cooccupacy,python,mysql-python,Python,Mysql Python,这个脚本是由一位前实验室成员创建的,他比我更适合Python脚本编写 我试图在整个人类h19基因组的外显子区域找到注释峰之间的协同性。然而,在尝试让它运行大约一个小时后,我正在寻求帮助 以下是脚本: #!/usr/bin/python import math import sys import re import csv import MySQLdb import itertools import argparse # format for execution: ./findCooccupa

这个脚本是由一位前实验室成员创建的,他比我更适合Python脚本编写

我试图在整个人类h19基因组的外显子区域找到注释峰之间的协同性。然而,在尝试让它运行大约一个小时后,我正在寻求帮助

以下是脚本:

#!/usr/bin/python

import math
import sys
import re
import csv
import MySQLdb
import itertools
import argparse

# format for execution: ./findCooccupancy.py <loci file> <comma separated list of marks to check> <window size> <outputfile>
# example: ./findCooccupancy.py AllGenes.txt PolII-ChIP,KAP1-ChIP,Hexim 150 output.txt
# format of loci file:
# chr2  12345678    12345900    GENEA   1   +
# chr4  987654321   98765000    GENEB   1   -
# etc...

locifile = sys.argv[1]
marks = sys.argv[2]
window = int(sys.argv[3])
outputfile = sys.argv[4]
loci = list(csv.reader(open(locifile, 'rb'),delimiter='\t'))
#loci = list(itertools.chain.from_iterable(loci))
db = MySQLdb.connect(host="localhost",user="snrnp",passwd="snrnp",db="snrnp")
cur = db.cursor()

cntdict = {}
for mark in marks.split(","):
cntdict[mark] = []
counter = 1
for locus in loci:
print "Working on line# " + str(counter)
counter += 1
if str(locus[5]) == "+":
    exon = locus[1]
else:
    exon = locus[2]
for mark in marks.split(","):
# this is incredibly dirty. sorry. I don't have time to do this better 

    if mark == 'PolII-ChIP':
        cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
        #print "select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")"
    else:
        cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and ((chr_start < " + str(exon) + " and chr_end > " + str(exon) + ") or (abs(chr_start - " + str(exon) + ") < " + str(window) + ") or (abs(chr_end - " + str(exon) + ") < " + str(window) + "))")
        #print "select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and ((chr_start < " + str(exon) + " and chr_end > " + str(exon) + ") or (abs(chr_start - " + str(exon) + ") < " + str(window) + ") or (abs(chr_end - " + str(exon) + ") < " + str(window) + "))"
    cnt = cur.fetchone()[0]
    if cnt > 0:
        cntdict[mark].append(",".join(locus))
convertedlist = []
for key in cntdict.keys():
convertedlist.append(cntdict[key]) 
intersectlist = set(convertedlist[0]).intersection(*convertedlist[1:])

for key in cntdict.keys():
print str(key) + " hits: " + str(len(cntdict[key]))
print "\nTotal Intersection Count: " + str(len(intersectlist)) 

with open(outputfile, 'w') as outputwriter:
for line in intersectlist:
    outputwriter.write(line + "\n")
这是我收到的最新错误消息:

Working on line# 1
Traceback (most recent call last):
File "./findCooccupancy.py", line 41, in <module>
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "'     and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
File "/Library/Python/2.7/site-packages/MySQLdb/cursors.py", line 205, in execute
self.errorhandler(self, exc, value)
File "/Library/Python/2.7/site-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.OperationalError: (1054, "Unknown column 'Start' in 'where clause'")

请格式化您的代码您当前的错误是sql问题,而不是Python…@Sir l33tname很抱歉格式化,我是新手。@AIG您能告诉我sql错误到底是什么吗?数据库中没有起始列吗?请格式化您的代码您当前的错误是sql问题,而不是Python…@Sir l33tname很抱歉格式化,我是新来的。@AIG您能告诉我sql错误到底是什么吗?数据库中没有起始列吗?
Working on line# 1
Traceback (most recent call last):
File "./findCooccupancy.py", line 41, in <module>
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "'     and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
File "/Library/Python/2.7/site-packages/MySQLdb/cursors.py", line 205, in execute
self.errorhandler(self, exc, value)
File "/Library/Python/2.7/site-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.OperationalError: (1054, "Unknown column 'Start' in 'where clause'")