Multithreading 为什么这个多线程脚本会冻结？_Multithreading_Python 3.x_Ipython_Numba

Multithreading 为什么这个多线程脚本会冻结？

multithreading python-3.x ipython

Multithreading 为什么这个多线程脚本会冻结？,multithreading,python-3.x,ipython,numba,Multithreading,Python 3.x,Ipython,Numba,我试图在这里修改最后一个示例：实现多线程groupby max函数到目前为止，我所做的工作如下。在IPython会话中，它开始打印“在主线程上”，但在这之后，当我尝试输入新命令时，控制台所做的一切都是打印输出“控制台已退出，值为：-1073741819，正在等待回答。” 如何使穿线正确工作我有numba。版本0.15.1 import numpy as np import pandas as pd from numba import void, double, jit,int64 im

我试图在这里修改最后一个示例：

实现多线程groupby max函数

到目前为止，我所做的工作如下。在IPython会话中，它开始打印“在主线程上”，但在这之后，当我尝试输入新命令时，控制台所做的一切都是打印输出“控制台已退出，值为：-1073741819，正在等待回答。”

如何使穿线正确工作

我有numba。版本0.15.1

import numpy as np
import pandas as pd
from numba import void, double, jit,int64

import threading
from ctypes import pythonapi, c_void_p

N = 1e1
m = 3
p = 3

x = np.random.randint( 0, m, N )
y = np.random.randint( 0, p, N )
z = np.random.randn(N)

key = [ 'x', 'y' ]

df = pd.DataFrame( {'x':x, 'y':y, 'z':z} )
df = df.sort( key )

grps = df.groupby( key )
print( df.head() )    

fld      = 'z'
comp_ids, _, ngroups = grps.grouper.group_info
data       = grps.obj[ fld ].values
length     = len( data )
numthreads = 2

def get_group_splits( grps, num_splits ):
    comp_ids, _, ngroups = grps.grouper.group_info
    length               = len( comp_ids )

    arrs              = np.array_split( comp_ids, num_splits )
    borders_comp_ids  = [a[0] for a in arrs[1:]]
    borders_indices   = np.searchsorted( comp_ids, borders_comp_ids )
    borders_indices   = np.append( np.insert( borders_indices, 0, 0), length )

    return borders_indices

def make_inner_func( fn, *args ):
    signature = void( *args )
    @jit(signature)
    def inner_func( result, comp_ids, data ):
        threadstate = savethread()
        fn( result, comp_ids, data )
        restorethread(threadstate)
    return inner_func

@jit(void(double[:], int64[:], double[:]))
def pgb_max( result, comp_ids, data ):

    i0 = comp_ids[0]
    i1 = comp_ids[1]
    result[i0:i1] = np.finfo( np.float ).min

    for i in range( len( comp_ids ) ):        
        cid = comp_ids[i]
        ai  = data[i]
        if ai>result[cid]:
            result[ cid ] = ai
    return result 

savethread = pythonapi.PyEval_SaveThread
savethread.argtypes = []
savethread.restype = c_void_p

restorethread = pythonapi.PyEval_RestoreThread
restorethread.argtypes = [c_void_p]
restorethread.restype = None

group_splits = get_group_splits( grps, numthreads )

n_length_args = (comp_ids,) + ( data, )

chunks = [[arg[ group_splits[i]:group_splits[i+1]] for arg in n_length_args]
          for i in range(numthreads)]

inner_func = make_inner_func( pgb_max, double[:], int64[:], double[:] )

result2    = np.ndarray( ngroups, dtype=df[fld].dtype)
result2[:] = np.finfo( result2.dtype ).min

threads = [ threading.Thread( target=inner_func, args=[result2] + chunks[i] ) for i in range(numthreads-1) ]

print( 'starting threads')
for thread in threads:
    thread.start()
    pass
print( 'on main thread')
inner_func( result2, *chunks[-1] )

print( 'joining')
for thread in threads:
    pass
    #thread.join()##

print( 'joined')

print( result2 )

你用过这个吗？我正在做类似的事情，而且很混乱