Python &引用;IOError:不能监视超过1024个套接字“;使用线程时
我在Python2.7.13上运行线程脚本时遇到了一些奇怪的问题。有时整个python.exe只是崩溃,没有错误消息,有时脚本只是挂断并停止运行,但有时我实际上收到了错误消息Python &引用;IOError:不能监视超过1024个套接字“;使用线程时,python,python-2.7,Python,Python 2.7,我在Python2.7.13上运行线程脚本时遇到了一些奇怪的问题。有时整个python.exe只是崩溃,没有错误消息,有时脚本只是挂断并停止运行,但有时我实际上收到了错误消息 Exception in thread Thread-370: Traceback (most recent call last): File "C:\Python27\lib\threading.py", line 801, in __bootstrap_inner self.
Exception in thread Thread-370:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 801, in __bootstrap_inner
self.run()
File "C:\Python27\lib\threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
File ".\1024.py", line 38, in loadData
result = play_scraper.similar(app_id, results=60)
File "C:\Python27\lib\site-packages\play_scraper\api.py", line 92, in similar
return s.similar(app_id, **kwargs)
File "C:\Python27\lib\site-packages\play_scraper\scraper.py", line 435, in similar
response = send_request('GET', url)
File "C:\Python27\lib\site-packages\play_scraper\utils.py", line 128, in send_request
verify=verify)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 501, in get
return self.request('GET', url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 423, in send
timeout=timeout
File "C:\Python27\lib\site-packages\requests\packages\urllib3\connectionpool.py", line 594, in urlopen
chunked=chunked)
File "C:\Python27\lib\site-packages\requests\packages\urllib3\connectionpool.py", line 350, in _make_request
self._validate_conn(conn)
File "C:\Python27\lib\site-packages\requests\packages\urllib3\connectionpool.py", line 835, in _validate_conn
conn.connect()
File "C:\Python27\lib\site-packages\requests\packages\urllib3\connection.py", line 281, in connect
conn = self._new_conn()
File "C:\Python27\lib\site-packages\requests\packages\urllib3\connection.py", line 138, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "C:\Python27\lib\site-packages\requests\packages\urllib3\util\connection.py", line 79, in create_connection
sock = socket.socket(af, socktype, proto)
File "C:\Python27\lib\site-packages\gevent\_socket2.py", line 124, in __init__
self._read_event = io(fileno, 1)
File "gevent.libev.corecext.pyx", line 487, in gevent.libev.corecext.loop.io (src/gevent/libev/gevent.corecext.c:6680)
File "gevent.libev.corecext.pyx", line 835, in gevent.libev.corecext.io.__init__ (src/gevent/libev/gevent.corecext.c:11088)
IOError: cannot watch more than 1024 sockets
我的脚本是这样的
import requests
from threading import Thread
import play_scraper
with open('apps.txt') as f:
app_idList = f.read().splitlines()
checkedIds = 0
def safe_print(content):
print "{0}\n".format(content),
def loadData (threadName,app_id):
global checkedIds
safe_print(threadName + str(checkedIds) + " Checking similar apps to " + app_id)
result = play_scraper.similar(app_id, results=60)
checkedIds += 1
for app_id in app_idList:
t = Thread(target=loadData, args=("Thread #0: ",app_id))
t.start()
t.join()
def send_request(method, url, data=None, params=None, headers=None, verify=True):
data = {} if data is None else data
params = {} if params is None else params
headers = default_headers() if headers is None else headers
if not data and method == 'POST':
data = generate_post_data()
try:
response = requests.request(
method=method,
url=url,
data=data,
params=params,
headers=headers,
verify=verify)
if not response.status_code == requests.codes.ok:
response.raise_for_status()
except requests.exceptions.RequestException as e:
log.error(e)
raise
return response
经过大约365-375次循环后,我得到了上面的错误消息。我正在为我的项目使用play_scraper模块,有问题的代码如下所示
import requests
from threading import Thread
import play_scraper
with open('apps.txt') as f:
app_idList = f.read().splitlines()
checkedIds = 0
def safe_print(content):
print "{0}\n".format(content),
def loadData (threadName,app_id):
global checkedIds
safe_print(threadName + str(checkedIds) + " Checking similar apps to " + app_id)
result = play_scraper.similar(app_id, results=60)
checkedIds += 1
for app_id in app_idList:
t = Thread(target=loadData, args=("Thread #0: ",app_id))
t.start()
t.join()
def send_request(method, url, data=None, params=None, headers=None, verify=True):
data = {} if data is None else data
params = {} if params is None else params
headers = default_headers() if headers is None else headers
if not data and method == 'POST':
data = generate_post_data()
try:
response = requests.request(
method=method,
url=url,
data=data,
params=params,
headers=headers,
verify=verify)
if not response.status_code == requests.codes.ok:
response.raise_for_status()
except requests.exceptions.RequestException as e:
log.error(e)
raise
return response
我在某个地方读到,这个问题可能是由于连接套接字打开造成的,使用会话可以解决这个问题。我将该函数编辑为以下内容,但仍然存在相同的问题
def send_request(method, url, data=None, params=None, headers=None, verify=True):
data = {} if data is None else data
params = {} if params is None else params
headers = default_headers() if headers is None else headers
if not data and method == 'POST':
data = generate_post_data()
try:
s = requests.Session()
if method == 'POST':
response = s.post(
url=url,
data=data,
params=params,
headers=headers,
verify=verify)
else:
response = s.get(
url=url,
data=data,
params=params,
headers=headers,
verify=verify)
if not response.status_code == requests.codes.ok:
response.raise_for_status()
except requests.exceptions.RequestException as e:
log.error(e)
raise
finally:
s.close()
return response
如果我运行一个简单的循环脚本,一切都会正常运行
import play_scraper
with open('apps.txt') as f:
app_idList = f.read().splitlines()
checkedIds = 0
for app_id in app_idList:
print str(checkedIds ) + " Checking similar apps to " + app_id
result = play_scraper.similar(app_id, results=60)
checkedIds += 1
可在此处找到播放刮板模块
如何解决此问题?您可能只需要增加系统上的打开文件描述符限制。有关更多信息,请参见此处:-Linux上的默认值通常是1024。@FadySad我在Windows 10 64BIT上运行它。我只是告诉您Linux中的默认值是1024,请检查Windows 1024是否有很多FD,但它甚至没有接近操作系统所能支持的值(在我的情况下,Ux:-~200k)。。检查。但是我没有注意到线程,当谈到Python时,GIL是一个可怕的首字母缩略词。你不需要在脚本的顶层进行任何处理,因为Windows没有分叉。将所有内容放入
主功能中。看见