Multithreading 为什么这个多线程脚本会冻结?
我试图在这里修改最后一个示例: 实现多线程groupby max函数 到目前为止,我所做的工作如下。在IPython会话中,它开始打印“在主线程上”,但在这之后,当我尝试输入新命令时,控制台所做的一切都是打印输出“控制台已退出,值为:-1073741819,正在等待回答。” 如何使穿线正确工作 我有numba。版本0.15.1Multithreading 为什么这个多线程脚本会冻结?,multithreading,python-3.x,ipython,numba,Multithreading,Python 3.x,Ipython,Numba,我试图在这里修改最后一个示例: 实现多线程groupby max函数 到目前为止,我所做的工作如下。在IPython会话中,它开始打印“在主线程上”,但在这之后,当我尝试输入新命令时,控制台所做的一切都是打印输出“控制台已退出,值为:-1073741819,正在等待回答。” 如何使穿线正确工作 我有numba。版本0.15.1 import numpy as np import pandas as pd from numba import void, double, jit,int64 im
import numpy as np
import pandas as pd
from numba import void, double, jit,int64
import threading
from ctypes import pythonapi, c_void_p
N = 1e1
m = 3
p = 3
x = np.random.randint( 0, m, N )
y = np.random.randint( 0, p, N )
z = np.random.randn(N)
key = [ 'x', 'y' ]
df = pd.DataFrame( {'x':x, 'y':y, 'z':z} )
df = df.sort( key )
grps = df.groupby( key )
print( df.head() )
fld = 'z'
comp_ids, _, ngroups = grps.grouper.group_info
data = grps.obj[ fld ].values
length = len( data )
numthreads = 2
def get_group_splits( grps, num_splits ):
comp_ids, _, ngroups = grps.grouper.group_info
length = len( comp_ids )
arrs = np.array_split( comp_ids, num_splits )
borders_comp_ids = [a[0] for a in arrs[1:]]
borders_indices = np.searchsorted( comp_ids, borders_comp_ids )
borders_indices = np.append( np.insert( borders_indices, 0, 0), length )
return borders_indices
def make_inner_func( fn, *args ):
signature = void( *args )
@jit(signature)
def inner_func( result, comp_ids, data ):
threadstate = savethread()
fn( result, comp_ids, data )
restorethread(threadstate)
return inner_func
@jit(void(double[:], int64[:], double[:]))
def pgb_max( result, comp_ids, data ):
i0 = comp_ids[0]
i1 = comp_ids[1]
result[i0:i1] = np.finfo( np.float ).min
for i in range( len( comp_ids ) ):
cid = comp_ids[i]
ai = data[i]
if ai>result[cid]:
result[ cid ] = ai
return result
savethread = pythonapi.PyEval_SaveThread
savethread.argtypes = []
savethread.restype = c_void_p
restorethread = pythonapi.PyEval_RestoreThread
restorethread.argtypes = [c_void_p]
restorethread.restype = None
group_splits = get_group_splits( grps, numthreads )
n_length_args = (comp_ids,) + ( data, )
chunks = [[arg[ group_splits[i]:group_splits[i+1]] for arg in n_length_args]
for i in range(numthreads)]
inner_func = make_inner_func( pgb_max, double[:], int64[:], double[:] )
result2 = np.ndarray( ngroups, dtype=df[fld].dtype)
result2[:] = np.finfo( result2.dtype ).min
threads = [ threading.Thread( target=inner_func, args=[result2] + chunks[i] ) for i in range(numthreads-1) ]
print( 'starting threads')
for thread in threads:
thread.start()
pass
print( 'on main thread')
inner_func( result2, *chunks[-1] )
print( 'joining')
for thread in threads:
pass
#thread.join()##
print( 'joined')
print( result2 )
你用过这个吗?我正在做类似的事情,而且很混乱