在python中如何在并行处理中锁定进程?

在python中如何在并行处理中锁定进程?,python,pandas,pyspark,multiprocessing,python-multiprocessing,Python,Pandas,Pyspark,Multiprocessing,Python Multiprocessing,我使用并行处理同时运行两个作业。 有时它们运行良好,但有时它们以随机顺序失败。我不知道这两者之间的原因是什么。 所以,当第一个作业正在运行并且第一个和第二个作业将开始完成时,是否有任何方法锁定一个进程 工作: ['Im\u Xref\u Prod\u bridge','RECORDTYPE']都有执行序列1。所以他们开始平行 首先,他们创建视图并行,然后运行查询。所以他们是随机顺序失败的。 你能帮我解决这个问题吗? 我和你分享我的代码片段。你可以更好地理解我的问题 如果你需要什么,请告诉我 代码

我使用并行处理同时运行两个作业。 有时它们运行良好,但有时它们以随机顺序失败。我不知道这两者之间的原因是什么。 所以,当第一个作业正在运行并且第一个和第二个作业将开始完成时,是否有任何方法锁定一个进程

工作: ['Im\u Xref\u Prod\u bridge','RECORDTYPE']都有执行序列1。所以他们开始平行

首先,他们创建视图并行,然后运行查询。所以他们是随机顺序失败的。 你能帮我解决这个问题吗? 我和你分享我的代码片段。你可以更好地理解我的问题

如果你需要什么,请告诉我

代码:

在Parallel_execution()中,我调用了两个函数-
Parallel_view_creation()
Parallel_build_query()

现在,我正在共享Parallel_view_creation()代码:


这回答了你的问题吗@悉达多如何在我的代码中使用锁?
def parallel_Execution():

    logging.info("parallel_Execution..................[started]")
    par_temp_loc = '/medaff/Temp/'

    df = pd.read_csv(par_temp_loc+'metadata_file_imedical.txt', delimiter='|')

    #get unique exec seq
    logging.info("Getting the unique Execution Sequence Number!")
    unique_exec_seq = df['Execution Sequence'].unique().tolist()
    unique_exec_seq.sort()
    num_unique_seq = len(unique_exec_seq)
    logging.info("Total Number of unique sequence Number : %2d" %(num_unique_seq))

    list_df = []
    df_main4 = pd.DataFrame()
    for exec_seq in unique_exec_seq:
        seq_num = exec_seq
        temp_df = df[df['Execution Sequence'] == exec_seq].copy()
        unique_master_job = temp_df['Master Job Name'].unique().tolist()
        print(unique_master_job)  #['Im_Xref_Prod_bridge', 'RECORDTYPE']
        logging.info("%s Master Job Started." %(unique_master_job))

        if(len(unique_master_job)>0):
            num_processes = len(unique_master_job)
            pool = Pool(processes=num_processes)
            #Parallel View creation process start...................
            result1 = pool.map(partial(parallel_view_creation, exec_seq, temp_df), unique_master_job)
            pool.close()
            pool.join()
            df_main = pd.DataFrame(result1)
            print("printing df_main")
            print(df_main)

            for m_job in df_main.master_job.unique():
                temp_df1 = df_main[df_main['master_job'] == m_job]
                status = temp_df1.status.unique()[0]
                if(status == 0):
                    unique_master_job.remove(m_job)


            pool = Pool(processes=num_processes)
            #Parallel build query process start
            result2 = pool.map(partial(parallel_build_query, exec_seq, temp_df), unique_master_job)
            pool.close()
            pool.join()
            if(result2):                
                df_main2 = pd.DataFrame(result2)
                #print("printing df_main2")
                #print(df_main2)
            df_main3 = pd.concat([df_main,df_main2])
            #print("printing df_main3")
            #print(df_main3)
            df_main4 = df_main4.append(df_main3)
        print(df_main4)
def parallel_view_creation(exec_seq, temp_df, master_job):
    #df3 = pd.DataFrame()
    error_dict = OrderedDict()
    error_dict['status'] = 0
    error_dict['error_msg'] = ''
    error_dict['error_func'] = ''
    error_dict['start_time'] = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")

    #success = 0
    #error_msg = ''
    #error_func = ''
    try:
        logging.info("parallel_view_creation.................[started]")

        df_data = temp_df[temp_df['Execution Sequence'] == exec_seq]
        df_data = df_data[df_data['Master Job Name'] == master_job]
        #print(df_data)
        pipe_files=['CDP']
        view_list = []

        metatemp_df_main = df_data[df_data['Master Job Name'] == master_job]
        #metatemp_df_main = metatemp_df_main.drop_duplicates('Source File Name Lnd')
        print(metatemp_df_main)
        for index, row in metatemp_df_main.iterrows():
            if pd.isna(row['Source File Name Lnd'])==False:
                if row['TableName'] in pipe_files:
                    print(row['Source File Name Lnd'])
                    df_read_file = sqlContext.read.format('csv').option("delimiter", '\001').options(header='true',quote='',inferSchema='true').load(row['Source File Name Lnd'])
                    print("VIEW:    "+row['Source File Name Lnd'])
                    df_read_file.createOrReplaceTempView(row['landingdfname'])
                    view_list.append(row['landingdfname'])
                    logging.info("View created for the table" + " " + row['TableName'] + " " + "in" + " " + row['Master Job Name'])
                else:
                    df_read_file = sqlContext.read.format('csv').option("delimiter", '|').options(header='true',quote='',inferSchema='true').load(row['Source File Name Lnd'])
                    print("CDP_VIEW:    "+row['Source File Name Lnd'])
                    df_read_file.createOrReplaceTempView(row['landingdfname'])
                    #row['landingdfname'].printSchema()
                    view_list.append(row['landingdfname'])
                    logging.info("View created for the table" + " " + row['TableName'] + " " + "in" + " " + row['Master Job Name'])
        logging.info(view_list)
        error_dict['status'] = 1
        #mail_df(master_job,exec_seq,'success')
    except Exception as Creationofviews:
        print(Creationofviews)
        logging.error(Creationofviews)
        logging.info("Creation of views has been failed for"  + " " + row['TableName'] + " " + "in" + " " + row['Master Job Name'] )
        error_dict['status'] = 0
        error_dict['error_msg'] = str(Creationofviews)
        error_dict['error_func'] = 'parallel_view_creation'
        #mail_df(master_job,exec_seq,status,Creationofviews,'parallel_view_creation')
    finally:
        error_dict['master_job'] = master_job  
        error_dict['exec_seq'] = exec_seq
        error_dict['end_time'] = datetime.datetime.now().strftime("%m-%d-%Y %H:%M:%S")
        return error_dict