Python Can';使用mpirun和多处理捕获ctrl-c
好消息:用python3 test.py运行这个测试,然后按ctrl-c。它应该停下来 坏消息:使用Python Can';使用mpirun和多处理捕获ctrl-c,python,parallel-processing,multiprocessing,mpi,openmpi,Python,Parallel Processing,Multiprocessing,Mpi,Openmpi,好消息:用python3 test.py运行这个测试,然后按ctrl-c。它应该停下来 坏消息:使用mpirun-n1 python3 test.py运行此测试按ctrl-c。Ops,mpirun被终止,但multiprocessing.pool生成的所有python进程将永远存在。如何解决这个问题 test.py: from mpi4py import MPI import multiprocessing as mp import signal import time class Grace
mpirun-n1 python3 test.py运行此测试
按ctrl-c。Ops,mpirun被终止,但multiprocessing.pool生成的所有python进程将永远存在。如何解决这个问题
test.py:
from mpi4py import MPI
import multiprocessing as mp
import signal
import time
class GracefulKiller:
kill_now = False
def __init__(self):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
def exit_gracefully(self, signum, frame):
self.kill_now = True
print("I kill")
def worker(e):
killer = GracefulKiller()
while(True):
if killer.kill_now:
e.set()
if e.is_set():
return
def main():
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
killer = GracefulKiller()
with mp.Manager() as manager:
e = manager.Event()
pool = mp.Pool()
arg = []
for i in range(100):
arg += [e]
r = pool.map_async(worker, arg)
r.get()
pool.join()
pool.close()
if killer.kill_now:
e.set()
if e.is_set():
comm.Abort()
main()
from mpi4py import MPI
import multiprocessing as mp
import signal
import time
class GracefulKiller:
kill_now = False
def __init__(self, name, pool=None):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
self.name = name
self.pool = pool
def exit_gracefully(self, signum, frame):
self.kill_now = True
print("I kill.", self.name)
if self.pool is not None:
self.pool.close()
self.pool.terminate()
def worker(e):
killer = GracefulKiller('worker')
while(True):
if killer.kill_now:
e.set()
if e.is_set():
return
def main():
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
with mp.Manager() as manager:
e = manager.Event()
pool = mp.Pool()
killer = GracefulKiller('main', pool)
arg = []
for i in range(100):
arg += [e]
r = pool.map_async(worker, arg)
r.get()
pool.join()
pool.close()
if killer.kill_now:
e.set()
if e.is_set():
comm.Abort()
main()
GracefulKiller
来自
mpirun
来自openmpi。我在Ubuntu和CentOS上测试了这个
更新:
- 我添加了一行
。然后我再次尝试使用print(“I kill”)
。它只打印了一次mpirun
,但仍有一堆I kill
进程在运行python3
更新2:
- 为优雅杀手添加了一个名字
- 添加了
,当主进程捕获ctrl-c时,尝试杀死主进程的pool.terminate()
生成的所有进程pool
from mpi4py import MPI
import multiprocessing as mp
import signal
import time
class GracefulKiller:
kill_now = False
def __init__(self):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
def exit_gracefully(self, signum, frame):
self.kill_now = True
print("I kill")
def worker(e):
killer = GracefulKiller()
while(True):
if killer.kill_now:
e.set()
if e.is_set():
return
def main():
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
killer = GracefulKiller()
with mp.Manager() as manager:
e = manager.Event()
pool = mp.Pool()
arg = []
for i in range(100):
arg += [e]
r = pool.map_async(worker, arg)
r.get()
pool.join()
pool.close()
if killer.kill_now:
e.set()
if e.is_set():
comm.Abort()
main()
from mpi4py import MPI
import multiprocessing as mp
import signal
import time
class GracefulKiller:
kill_now = False
def __init__(self, name, pool=None):
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
self.name = name
self.pool = pool
def exit_gracefully(self, signum, frame):
self.kill_now = True
print("I kill.", self.name)
if self.pool is not None:
self.pool.close()
self.pool.terminate()
def worker(e):
killer = GracefulKiller('worker')
while(True):
if killer.kill_now:
e.set()
if e.is_set():
return
def main():
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
with mp.Manager() as manager:
e = manager.Event()
pool = mp.Pool()
killer = GracefulKiller('main', pool)
arg = []
for i in range(100):
arg += [e]
r = pool.map_async(worker, arg)
r.get()
pool.join()
pool.close()
if killer.kill_now:
e.set()
if e.is_set():
comm.Abort()
main()
- python3 test.py(然后是ctrl-c):
I kill. worker/main ...
...
File "test.py", line 20, in exit_gracefully
self.pool.terminate()
File "/usr/lib/python3.5/multiprocessing/pool.py", line 505, in terminate
self._terminate()
File "/usr/lib/python3.5/multiprocessing/util.py", line 186, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/lib/python3.5/multiprocessing/pool.py", line 535, in _terminate_pool
...
I kill. worker/main ...
- mpirun-n 1 python3 test.py(然后ctrl-c):
,一堆python3进程仍在运行不打印任何内容
可能有帮助的事情:
有什么建议吗?主进程接收到信号,但孩子们没有;在MPI下,它们可以在不同的主机上运行。我怀疑
comm.Abort
不是发送终止信号的正确方式,或者mpirun
使用不同的信号终止子进程SIGINT
和SIGTERM
在这方面有所不同,但是Ctrl+C发送SIGINT。你是对的。只有主进程的优雅杀手才能捕捉到ctrl-c。子进程没有做任何事情。仍然找不到修复方法。@你有没有发现这个问题?我也有同样的问题,这很令人沮丧。