Python Py2neo脚本在成功提交事务后约90秒停止
在中更改代码以构建cypher事务而不是在Py2neo中创建节点和关系后,它似乎可以成功地工作,但事务在大约90秒后停止,并且没有错误消息。脚本继续运行,但迭代停止,不再进行任何事务,查看日志,似乎所有与数据库的交互都停止了。有人能帮我理解为什么会这样吗?以下代码正在Jupyter中执行Python Py2neo脚本在成功提交事务后约90秒停止,python,neo4j,transactions,py2neo,Python,Neo4j,Transactions,Py2neo,在中更改代码以构建cypher事务而不是在Py2neo中创建节点和关系后,它似乎可以成功地工作,但事务在大约90秒后停止,并且没有错误消息。脚本继续运行,但迭代停止,不再进行任何事务,查看日志,似乎所有与数据库的交互都停止了。有人能帮我理解为什么会这样吗?以下代码正在Jupyter中执行 import pandas as pd import csv import math import allel import zarr from py2neo import Graph, Node, Relat
import pandas as pd
import csv
import math
import allel
import zarr
from py2neo import Graph, Node, Relationship, NodeMatcher
zarr_path = '/media/user/Seagate Backup Plus Drive/uk_twin_cohort/exome/chroms.zarr'
callset = zarr.open_group(zarr_path, mode='r')
graph = Graph(user="neo4j", password="password")
chrom_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,'X']
matcher = NodeMatcher(graph)
for chrom in chrom_list:
chrom_label = "Chromosome_" + str(chrom)
samples = callset[chrom]['samples']
variants = allel.VariantChunkedTable(callset[chrom]['variants'], names=['AC','AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR', 'AF_MAX', 'CGT', 'CLR', 'CSQ', 'DP', 'DP4', 'ESP_MAF', 'FILTER_LowQual', 'FILTER_MinHWE', 'FILTER_MinVQSLOD', 'FILTER_PASS', 'HWE', 'ICF', 'ID', 'IS', 'PC2', 'PCHI2', 'POS', 'PR', 'QCHI2', 'QUAL', 'REF', 'ALT', 'INDEL', 'SHAPEIT', 'SNP_ID', 'TYPE', 'UGT', 'VQSLOD', 'dbSNPmismatch', 'is_snp', 'numalt'], index='POS')
pos = variants['POS'][:]
pos = pos.tolist()
ref = variants['REF'][:]
alt = variants['ALT'][:]
dpz = callset[chrom]['calldata/DP']
dp = dpz[:, 0]
psz = callset[chrom]['calldata/PS']
plz = callset[chrom]['calldata/PL']
gpz = callset[chrom]['calldata/GP']
calldata = callset[chrom]['calldata']
gt = allel.GenotypeDaskArray(calldata['GT'])
hap = gt.to_haplotypes()
hap = gt.to_haplotypes()
hap1 = hap[:, ::2]
hap2 = hap[:, 1::2]
list_h1 = hap1[:, 0].compute()
list_h1 = list_h1.tolist()
list_h2 = hap2[:, 0].compute()
for i in range(len(samples)):
subject = samples[i]
dp = dpz[:, i]
ps = psz[:, i]
pl = plz[:, i]
gp = gpz[:, i]
list_h1 = hap1[:, i].compute()
list_h2 = hap2[:, i].compute()
bp1 = []
bp2 = []
hpt = []
n1 = []
n2 = []
g = Graph()
print(subject)
print("Subject " + str(i) + " of " + str(len(samples)))
s = matcher.match("Subject", subject_id= subject).first()
print(s)
if s is None:
continue
j = 0
nodes = []
for j in range(len(pos)):
h1 = int(list_h1[j])
h2 = int(list_h2[j])
k = int(pos[j])
l = str(ref[j])
m = str(alt[j][h1-1])
o = str(alt[j][h2-1])
if h1 == 0 and h2 == 0:
a = matcher.match(chrom_label, pos=k, bp=l).first()
nodes.append(a)
nodes.append(a)
elif h1 == 0 and h2 > 0:
a = matcher.match(chrom_label, pos=k, bp=l).first()
nodes.append(a)
a = matcher.match(chrom_label, pos=k, bp=o).first()
nodes.append(a)
elif h1 > 0 and h2 == 0:
a = matcher.match(chrom_label, pos=k, bp=m).first()
nodes.append(a)
a = matcher.match(chrom_label, pos=k, bp=l).first()
nodes.append(a)
elif h1 == h2 and h1 > 0:
a = matcher.match(chrom_label, pos=k, bp=m).first()
nodes.append(a)
nodes.append(a)
else:
a = matcher.match(chrom_label, pos=k, bp=m).first()
nodes.append(a)
a = matcher.match(chrom_label, pos=k, bp=o).first()
nodes.append(a)
if j % 10000 == 0:
print(str(j) + " rows complete.")
print(subject + " matching complete.")
print(len(nodes))
j=0
tx = g.begin()
for j in range(len(pos)):
read_depth = int(dp[j])
ps1 = int(ps[j])
PL0 = int(pl[j][0])
PL1 = int(pl[j][1])
PL2 = int(pl[j][2])
genotype = str(h1) + '|' + str(h2)
GP0 = float(gp[j][0])
GP1 = float(gp[j][1])
GP2 = float(gp[j][2])
h1 = int(list_h1[j])
h2 = int(list_h2[j])
k = int(pos[j])
l = str(ref[j])
m = str(alt[j][h1-1])
o = str(alt[j][h2-1])
if h1 == 0 and h2 == 0:
x = (2*j)
a = nodes[x]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HOMOZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
elif h1 == 0 and h2 > 0:
x = (2*j)
a = nodes[x]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
y = (2*j)+1
b = nodes[y]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {B} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "B":b.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
elif h1 > 0 and h2 == 0:
x = (2*j)
a = nodes[j]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
y = (2*j)+1
b = nodes[y]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {B} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "B":b.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
elif h1 == h2 and h1 > 0:
x = (2*j)
a = nodes[j]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HOMOZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
else:
x = (2*j)
a = nodes[j]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
y = (2*j)+1
b = nodes[y]
tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {B} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "B":b.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
if j % 1000 == 0:
print(str(j) + " rows added to database.")
tx.commit()
tx = g.begin()
print(chrom_label + " completed.")
只有当
j%1000==0
测试成功时,才能调用tx.commit()。您需要确保在退出j
for循环后,如有必要调用tx.commit()
,否则将丢弃最后一批操作
顺便说一下,您当前正在通过循环第一次调用tx.commit()
(因为0%1000==0
)。您可能希望避免这种情况。如果第一次j迭代自己提交,会有不同吗?谢谢,我试试看会发生什么。我仍然不确定它是否解释了脚本停止的方式。我开始认为这可能是一个棘手的问题。第一次提交是低效的,但不应该引起你的主要问题。如果(j+1)%1000==0,您可以将测试更改为,以避免它。