Python Py2neo脚本在成功提交事务后约90秒停止

Python Py2neo脚本在成功提交事务后约90秒停止,python,neo4j,transactions,py2neo,Python,Neo4j,Transactions,Py2neo,在中更改代码以构建cypher事务而不是在Py2neo中创建节点和关系后,它似乎可以成功地工作,但事务在大约90秒后停止,并且没有错误消息。脚本继续运行,但迭代停止,不再进行任何事务,查看日志,似乎所有与数据库的交互都停止了。有人能帮我理解为什么会这样吗?以下代码正在Jupyter中执行 import pandas as pd import csv import math import allel import zarr from py2neo import Graph, Node, Relat

在中更改代码以构建cypher事务而不是在Py2neo中创建节点和关系后,它似乎可以成功地工作,但事务在大约90秒后停止,并且没有错误消息。脚本继续运行,但迭代停止,不再进行任何事务,查看日志,似乎所有与数据库的交互都停止了。有人能帮我理解为什么会这样吗?以下代码正在Jupyter中执行

import pandas as pd
import csv
import math
import allel
import zarr
from py2neo import Graph, Node, Relationship, NodeMatcher

zarr_path = '/media/user/Seagate Backup Plus Drive/uk_twin_cohort/exome/chroms.zarr'
callset = zarr.open_group(zarr_path, mode='r')

graph = Graph(user="neo4j", password="password")

chrom_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,'X']
matcher = NodeMatcher(graph)

for chrom in chrom_list:
    chrom_label = "Chromosome_" + str(chrom)
    samples = callset[chrom]['samples']
    variants = allel.VariantChunkedTable(callset[chrom]['variants'], names=['AC','AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR', 'AF_MAX', 'CGT', 'CLR', 'CSQ', 'DP', 'DP4', 'ESP_MAF', 'FILTER_LowQual', 'FILTER_MinHWE', 'FILTER_MinVQSLOD', 'FILTER_PASS', 'HWE', 'ICF', 'ID', 'IS', 'PC2', 'PCHI2', 'POS', 'PR', 'QCHI2', 'QUAL', 'REF', 'ALT', 'INDEL', 'SHAPEIT', 'SNP_ID', 'TYPE', 'UGT', 'VQSLOD', 'dbSNPmismatch', 'is_snp', 'numalt'], index='POS')
    pos = variants['POS'][:]
    pos = pos.tolist()
    ref = variants['REF'][:]
    alt = variants['ALT'][:]
    dpz = callset[chrom]['calldata/DP']
    dp = dpz[:, 0]
    psz = callset[chrom]['calldata/PS']
    plz = callset[chrom]['calldata/PL']
    gpz = callset[chrom]['calldata/GP']
    calldata = callset[chrom]['calldata']
    gt = allel.GenotypeDaskArray(calldata['GT'])
    hap = gt.to_haplotypes()
    hap = gt.to_haplotypes()
    hap1 = hap[:, ::2]
    hap2 = hap[:, 1::2]
    list_h1 = hap1[:, 0].compute()
    list_h1 = list_h1.tolist()
    list_h2 = hap2[:, 0].compute()
    for i in range(len(samples)):
        subject = samples[i]
        dp = dpz[:, i]
        ps = psz[:, i]
        pl = plz[:, i]
        gp = gpz[:, i]
        list_h1 = hap1[:, i].compute()
        list_h2 = hap2[:, i].compute()
        bp1 = []
        bp2 = []
        hpt = []
        n1 = []
        n2 = []
        g = Graph()
        print(subject)
        print("Subject " + str(i) + " of " + str(len(samples)))
        s = matcher.match("Subject", subject_id= subject).first()
        print(s)
        if s is None:
            continue
        j = 0
        nodes = []
        for j in range(len(pos)):
            h1 = int(list_h1[j])
            h2 = int(list_h2[j])
            k = int(pos[j])
            l = str(ref[j])
            m = str(alt[j][h1-1])
            o = str(alt[j][h2-1])
            if h1 == 0 and h2 == 0:
                a = matcher.match(chrom_label, pos=k, bp=l).first()
                nodes.append(a)
                nodes.append(a)

            elif h1 == 0 and h2 > 0:
                a = matcher.match(chrom_label, pos=k, bp=l).first()
                nodes.append(a)
                a = matcher.match(chrom_label, pos=k, bp=o).first()
                nodes.append(a)

            elif h1 > 0 and h2 == 0:
                a = matcher.match(chrom_label, pos=k, bp=m).first()
                nodes.append(a)
                a = matcher.match(chrom_label, pos=k, bp=l).first()
                nodes.append(a)

            elif h1 == h2 and h1 > 0:
                a = matcher.match(chrom_label, pos=k, bp=m).first()
                nodes.append(a)
                nodes.append(a)

            else:
                a = matcher.match(chrom_label, pos=k, bp=m).first()
                nodes.append(a)
                a = matcher.match(chrom_label, pos=k, bp=o).first()
                nodes.append(a)
            if j % 10000 == 0:
                print(str(j) + " rows complete.")
        print(subject + " matching complete.")
        print(len(nodes))
        j=0
        tx = g.begin()
        for j in range(len(pos)):
            read_depth = int(dp[j])
            ps1 = int(ps[j])
            PL0 = int(pl[j][0])
            PL1 = int(pl[j][1])
            PL2 = int(pl[j][2])
            genotype = str(h1) + '|' + str(h2) 
            GP0 = float(gp[j][0])
            GP1 = float(gp[j][1])
            GP2 = float(gp[j][2])
            h1 = int(list_h1[j])
            h2 = int(list_h2[j])
            k = int(pos[j])
            l = str(ref[j])
            m = str(alt[j][h1-1])
            o = str(alt[j][h2-1])

            if h1 == 0 and h2 == 0:
                x = (2*j)
                a = nodes[x]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HOMOZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
            elif h1 == 0 and h2 > 0:
                x = (2*j)
                a = nodes[x]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
                y = (2*j)+1
                b = nodes[y]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {B} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "B":b.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
            elif h1 > 0 and h2 == 0:
                x = (2*j)
                a = nodes[j]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
                y = (2*j)+1
                b = nodes[y]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {B} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "B":b.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
            elif h1 == h2 and h1 > 0:
                x = (2*j)
                a = nodes[j]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HOMOZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
            else:
                x = (2*j)
                a = nodes[j]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {A} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "A":a.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
                y = (2*j)+1
                b = nodes[y]
                tx.run("MATCH (s) WHERE id(s) = {S} MATCH (a) WHERE id(a) = {B} MERGE (s)-[r:HETEROZYGOUS {HTA:{H1}, HTB:{H2}, GT:{GT}, dp:{DP}, phase_set:{PS1}, PL0:{PL0}, PL1:{PL1}, PL2:{PL2}, GP0:{GP0}, GP1:{GP1}, GP2:{GP2}}]->(a)", {"S":s.identity, "B":b.identity, "H1":h1, "H2":h2, "GT":genotype, "DP":read_depth, "PS1":ps1, "PL0":PL0, "PL1":PL1, "PL2":PL2, "GP0":GP0, "GP1":GP1, "GP2":GP2 })
            if j % 1000 == 0:
                print(str(j) + " rows added to database.")
                tx.commit()
                tx = g.begin()

    print(chrom_label + " completed.")

只有当
j%1000==0
测试成功时,才能调用
tx.commit()。您需要确保在退出
j
for循环后,如有必要调用
tx.commit()
,否则将丢弃最后一批操作


顺便说一下,您当前正在通过循环第一次调用
tx.commit()
(因为
0%1000==0
)。您可能希望避免这种情况。

如果第一次j迭代自己提交,会有不同吗?谢谢,我试试看会发生什么。我仍然不确定它是否解释了脚本停止的方式。我开始认为这可能是一个棘手的问题。第一次提交是低效的,但不应该引起你的主要问题。如果(j+1)%1000==0,您可以将测试更改为
,以避免它。