Python &引用;IOError:不能监视超过1024个套接字“;使用线程时

Python &引用;IOError:不能监视超过1024个套接字“;使用线程时,python,python-2.7,Python,Python 2.7,我在Python2.7.13上运行线程脚本时遇到了一些奇怪的问题。有时整个python.exe只是崩溃,没有错误消息,有时脚本只是挂断并停止运行,但有时我实际上收到了错误消息 Exception in thread Thread-370: Traceback (most recent call last): File "C:\Python27\lib\threading.py", line 801, in __bootstrap_inner self.

我在Python2.7.13上运行线程脚本时遇到了一些奇怪的问题。有时整个python.exe只是崩溃,没有错误消息,有时脚本只是挂断并停止运行,但有时我实际上收到了错误消息

    Exception in thread Thread-370:
    Traceback (most recent call last):
      File "C:\Python27\lib\threading.py", line 801, in __bootstrap_inner
        self.run()
      File "C:\Python27\lib\threading.py", line 754, in run
        self.__target(*self.__args, **self.__kwargs)
      File ".\1024.py", line 38, in loadData
        result = play_scraper.similar(app_id, results=60)
      File "C:\Python27\lib\site-packages\play_scraper\api.py", line 92, in similar
        return s.similar(app_id, **kwargs)
      File "C:\Python27\lib\site-packages\play_scraper\scraper.py", line 435, in similar
        response = send_request('GET', url)
      File "C:\Python27\lib\site-packages\play_scraper\utils.py", line 128, in send_request
        verify=verify)
      File "C:\Python27\lib\site-packages\requests\sessions.py", line 501, in get
        return self.request('GET', url, **kwargs)
      File "C:\Python27\lib\site-packages\requests\sessions.py", line 488, in request
        resp = self.send(prep, **send_kwargs)
      File "C:\Python27\lib\site-packages\requests\sessions.py", line 609, in send
        r = adapter.send(request, **kwargs)
      File "C:\Python27\lib\site-packages\requests\adapters.py", line 423, in send
        timeout=timeout
      File "C:\Python27\lib\site-packages\requests\packages\urllib3\connectionpool.py", line 594, in urlopen
        chunked=chunked)
      File "C:\Python27\lib\site-packages\requests\packages\urllib3\connectionpool.py", line 350, in _make_request
        self._validate_conn(conn)
      File "C:\Python27\lib\site-packages\requests\packages\urllib3\connectionpool.py", line 835, in _validate_conn
        conn.connect()
      File "C:\Python27\lib\site-packages\requests\packages\urllib3\connection.py", line 281, in connect
        conn = self._new_conn()
      File "C:\Python27\lib\site-packages\requests\packages\urllib3\connection.py", line 138, in _new_conn
        (self.host, self.port), self.timeout, **extra_kw)
      File "C:\Python27\lib\site-packages\requests\packages\urllib3\util\connection.py", line 79, in create_connection
        sock = socket.socket(af, socktype, proto)
      File "C:\Python27\lib\site-packages\gevent\_socket2.py", line 124, in __init__
        self._read_event = io(fileno, 1)
      File "gevent.libev.corecext.pyx", line 487, in gevent.libev.corecext.loop.io (src/gevent/libev/gevent.corecext.c:6680)
      File "gevent.libev.corecext.pyx", line 835, in gevent.libev.corecext.io.__init__ (src/gevent/libev/gevent.corecext.c:11088)
    IOError: cannot watch more than 1024 sockets
我的脚本是这样的

import requests
from threading import Thread
import play_scraper

with open('apps.txt') as f:
    app_idList = f.read().splitlines()

checkedIds = 0

def safe_print(content):
    print "{0}\n".format(content),

def loadData (threadName,app_id):

    global checkedIds

    safe_print(threadName + str(checkedIds) + " Checking similar apps to " + app_id)
    result = play_scraper.similar(app_id, results=60)

    checkedIds += 1

for app_id in app_idList:

    t = Thread(target=loadData, args=("Thread #0: ",app_id))
    t.start()
    t.join()
def send_request(method, url, data=None, params=None, headers=None, verify=True):

    data = {} if data is None else data
    params = {} if params is None else params
    headers = default_headers() if headers is None else headers
    if not data and method == 'POST':
        data = generate_post_data()

    try:
        response = requests.request(
            method=method,
            url=url,
            data=data,
            params=params,
            headers=headers,
            verify=verify)
        if not response.status_code == requests.codes.ok:
            response.raise_for_status()
    except requests.exceptions.RequestException as e:
        log.error(e)
        raise

    return response
经过大约365-375次循环后,我得到了上面的错误消息。我正在为我的项目使用play_scraper模块,有问题的代码如下所示

import requests
from threading import Thread
import play_scraper

with open('apps.txt') as f:
    app_idList = f.read().splitlines()

checkedIds = 0

def safe_print(content):
    print "{0}\n".format(content),

def loadData (threadName,app_id):

    global checkedIds

    safe_print(threadName + str(checkedIds) + " Checking similar apps to " + app_id)
    result = play_scraper.similar(app_id, results=60)

    checkedIds += 1

for app_id in app_idList:

    t = Thread(target=loadData, args=("Thread #0: ",app_id))
    t.start()
    t.join()
def send_request(method, url, data=None, params=None, headers=None, verify=True):

    data = {} if data is None else data
    params = {} if params is None else params
    headers = default_headers() if headers is None else headers
    if not data and method == 'POST':
        data = generate_post_data()

    try:
        response = requests.request(
            method=method,
            url=url,
            data=data,
            params=params,
            headers=headers,
            verify=verify)
        if not response.status_code == requests.codes.ok:
            response.raise_for_status()
    except requests.exceptions.RequestException as e:
        log.error(e)
        raise

    return response
我在某个地方读到,这个问题可能是由于连接套接字打开造成的,使用会话可以解决这个问题。我将该函数编辑为以下内容,但仍然存在相同的问题

def send_request(method, url, data=None, params=None, headers=None, verify=True):

    data = {} if data is None else data
    params = {} if params is None else params
    headers = default_headers() if headers is None else headers
    if not data and method == 'POST':
        data = generate_post_data()

    try:
        s = requests.Session()
        if method == 'POST':
            response = s.post(
                url=url,
                data=data,
                params=params,
                headers=headers,
                verify=verify)
        else:
             response = s.get(
                url=url,
                data=data,
                params=params,
                headers=headers,
                verify=verify)
        if not response.status_code == requests.codes.ok:
            response.raise_for_status()
    except requests.exceptions.RequestException as e:
        log.error(e)
        raise
    finally:
        s.close()

    return response
如果我运行一个简单的循环脚本,一切都会正常运行

    import play_scraper


    with open('apps.txt') as f:
        app_idList = f.read().splitlines()

    checkedIds = 0

    for app_id in app_idList:
        print str(checkedIds ) + " Checking similar apps to " + app_id
        result = play_scraper.similar(app_id, results=60)
        checkedIds += 1
可在此处找到播放刮板模块


如何解决此问题?

您可能只需要增加系统上的打开文件描述符限制。有关更多信息,请参见此处:-Linux上的默认值通常是1024。@FadySad我在Windows 10 64BIT上运行它。我只是告诉您Linux中的默认值是1024,请检查Windows 1024是否有很多FD,但它甚至没有接近操作系统所能支持的值(在我的情况下,Ux:-~200k)。。检查。但是我没有注意到线程,当谈到Python时,GIL是一个可怕的首字母缩略词。你不需要在脚本的顶层进行任何处理,因为Windows没有分叉。将所有内容放入
主功能中。看见