Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/multithreading/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
2017版:10.000个python并行HTTP请求_Python_Multithreading_Parallel Processing_Request_Httpresponse - Fatal编程技术网

2017版:10.000个python并行HTTP请求

2017版:10.000个python并行HTTP请求,python,multithreading,parallel-processing,request,httpresponse,Python,Multithreading,Parallel Processing,Request,Httpresponse,我知道这个问题已经被问了好几次了,但请进一步阅读 关于我的问题的小背景 我想得到10000个url的回复。然而,最大的问题是每个页面的加载时间。也就是说,对于每个页面,服务器端都有一个大的进程(~20-30)分钟。当然,我可以同步地做这件事,然后明年告诉你花了多长时间;)但这并不是一个真正的选择,所以我在stack overflow和internet上搜索,找到了一些“答案” 即使只使用200个请求,我的CPU也达到了100%(可能是因为等待时间太长了吧?) 如果比您想要的要多得多,那么

我知道这个问题已经被问了好几次了,但请进一步阅读 关于我的问题的小背景
我想得到10000个url的回复。然而,最大的问题是每个页面的加载时间。也就是说,对于每个页面,服务器端都有一个大的进程(~20-30)分钟。当然,我可以同步地做这件事,然后明年告诉你花了多长时间;)但这并不是一个真正的选择,所以我在stack overflow和internet上搜索,找到了一些“答案”



即使只使用200个请求,我的CPU也达到了100%(可能是因为等待时间太长了吧?)



如果比您想要的要多得多,那么使用异步io 不是线程。请求+gevent=grequests

这对我来说似乎很好,但grequests似乎有点不受欢迎(如果你感兴趣,请阅读此答案上的评论)。但至少我很确定我必须选择异步而不是多线程。所以我开始在网上搜索



这对我来说似乎是个不错的选择,所以我调整了代码并进行了测试。我面临的问题是:


直到200次请求,我才看到打印:

201. Thu, 28 Sep 2017 07:32:18 GMT:http://api.metagenomics.anl.gov/annotation/sequence/mgm4484960.3?source=RDP with delay None
但在这之后,需要很长时间才能添加其余的

似乎不成比例
当我尝试时,200个请求将需要约2小时,而一个请求只需要20-30分钟



当前代码: 基于滚动条,代码看起来很长,但大部分是URL;)


最后,问题来了
发送大量并行HTTP请求的最佳/最有效的2017方式?

撇开不谈,如果一个请求太重,服务器需要20-30分钟才能响应,它将如何处理数千个异步请求?我想这是个问题,哈哈@roganjosh,但我不明白你在问什么,它并不在乎服务器端需要多少钱,是吗?除了服务器允许的请求限制之外,如果处理请求需要那么长的时间,则假定服务器上运行CPU或内存受限的进程来计算响应。服务器仍然有有限的资源,因此即使您可以发送大量请求,也不能保证它可以同时响应所有请求;这就是为什么多个请求需要2小时才能响应的原因-这不是异步库的故障,而是服务器实际处理它们的速度受到限制。啊,感谢您的解释。有没有办法验证它是由服务器端引起的@我不确定。它在内部服务器中吗?
lib: grequests
approach: async
lib: asyncio
approach: async
201. Thu, 28 Sep 2017 07:32:18 GMT:http://api.metagenomics.anl.gov/annotation/sequence/mgm4484960.3?source=RDP with delay None
import asyncio
import time
from aiohttp import ClientPayloadError
from aiohttp import ClientSession

COUNTER = 1

async def fetch(url, session):

    async with session.get(url) as response:
        delay = response.headers.get("DELAY")
        date = response.headers.get("DATE")
        global COUNTER
        COUNTER +=1
        print("{}. {}:{} with delay {}".format(str(COUNTER), date, response.url, delay))
        try:
            return await response.text()
        except ClientPayloadError:
            print("ERROR: ".format(url))


async def bound_fetch(sem, url, session):
    # Getter function with semaphore.
    async with sem:
        await fetch(url, session)


async def run():
    urls = [build_url(id) for id in load_ids()]
    tasks = []
    # create instance of Semaphore
    sem = asyncio.Semaphore(1000)

    # Create client session that will ensure we dont open new connection
    # per each request.
    async with ClientSession(conn_timeout=10000, read_timeout=10000) as session:
        for url in urls:
           #pass Semaphore and session to every GET request
            task = asyncio.ensure_future(bound_fetch(sem, url, session))
            tasks.append(task)

        responses = asyncio.gather(*tasks)
        await responses

def build_url(id):
    url = 'http://api.metagenomics.anl.gov/annotation/sequence/{}?source=RDP'.format(id)
    return url

def load_ids():
    #I will read a file here but added the important content to make it reproducible.
    return """
mgm4558908.3
mgm4484962.3
mgm4734169.3
mgm4558911.3
mgm4484983.3
mgm4558918.3
mgm4735453.3
mgm4735450.3
mgm4558931.3
mgm4734170.3
mgm4524851.3
mgm4485066.3
mgm4484952.3
mgm4484985.3
mgm4485062.3
mgm4484969.3
mgm4485024.3
mgm4485021.3
mgm4485061.3
mgm4485070.3
mgm4449604.3
mgm4510361.3
mgm4558912.3
mgm4485034.3
mgm4484996.3
mgm4524848.3
mgm4485035.3
mgm4734167.3
mgm4485001.3
mgm4735458.3
mgm4484948.3
mgm4508946.3
mgm4484973.3
mgm4485009.3
mgm4453148.3
mgm4485064.3
mgm4510364.3
mgm4547279.3
mgm4510360.3
mgm4484964.3
mgm4453150.3
mgm4548349.3
mgm4484975.3
mgm4558909.3
mgm4484945.3
mgm4734166.3
mgm4572199.3
mgm4485053.3
mgm4558915.3
mgm4485020.3
mgm4445996.3
mgm4484990.3
mgm4485052.3
mgm4485055.3
mgm4485029.3
mgm4558903.3
mgm4558924.3
mgm4735452.3
mgm4485392.3
mgm4484984.3
mgm4558937.3
mgm4485390.3
mgm4523756.3
mgm4485397.3
mgm4485044.3
mgm4484994.3
mgm4485399.3
mgm4485040.3
mgm4558904.3
mgm4558907.3
mgm4485037.3
mgm4485073.3
mgm4484981.3
mgm4525982.3
mgm4558897.3
mgm4547780.3
mgm4485022.3
mgm4524847.3
mgm4484959.3
mgm4558921.3
mgm4485067.3
mgm4484956.3
mgm4558902.3
mgm4558914.3
mgm4735454.3
mgm4453147.3
mgm4484991.3
mgm4484999.3
mgm4558935.3
mgm4485010.3
mgm4485393.3
mgm4558922.3
mgm4558938.3
mgm4508944.3
mgm4544122.3
mgm4485403.3
mgm4734172.3
mgm4735455.3
mgm4558926.3
mgm4558901.3
mgm4484982.3
mgm4485389.3
mgm4485018.3
mgm4558913.3
mgm4485398.3
mgm4734171.3
mgm4558939.3
mgm4485045.3
mgm4485058.3
mgm4572197.3
mgm4523758.3
mgm4484997.3
mgm4445993.3
mgm4484963.3
mgm4484989.3
mgm4485017.3
mgm4735448.3
mgm4485008.3
mgm4485395.3
mgm4547281.3
mgm4510366.3
mgm4485391.3
mgm4558900.3
mgm4558910.3
mgm4484957.3
mgm4558929.3
mgm4485014.3
mgm4485059.3
mgm4735459.3
mgm4525983.3
mgm4485032.3
mgm4485075.3
mgm4547285.3
mgm4547282.3
mgm4485016.3
mgm4484968.3
mgm4485030.3
mgm4484960.3
mgm4763691.3
mgm4558928.3
mgm4484987.3
mgm4734173.3
mgm4485012.3
mgm4484967.3
mgm4485054.3
mgm4485063.3
mgm4523757.3
mgm4485041.3
mgm4558919.3
mgm4734165.3
mgm4485069.3
mgm4484965.3
mgm4547280.3
mgm4484986.3
mgm4508945.3
mgm4523769.3
mgm4485004.3
mgm4524852.3
mgm4558923.3
mgm4485048.3
mgm4485049.3
mgm4485056.3
mgm4484980.3
mgm4734168.3
mgm4485031.3
mgm4558917.3
mgm4558898.3
mgm4735451.3
mgm4508948.3
mgm4484992.3
mgm4484950.3
mgm4485028.3
mgm4510365.3
mgm4485038.3
mgm4558936.3
mgm4485396.3
mgm4485050.3
mgm4510359.3
mgm4485036.3
mgm4558916.3
mgm4484966.3
mgm4485039.3
mgm4558934.3
mgm4445994.3
mgm4523754.3
mgm4484947.3
mgm4524849.3
mgm4484946.3
mgm4485015.3
mgm4524846.3
mgm4485043.3
mgm4476876.3
mgm4485033.3
mgm4524853.3
mgm4523770.3
mgm4485000.3
mgm4485025.3
mgm4453149.3
mgm4558940.3
mgm4484958.3
mgm4484988.3
mgm4485071.3
mgm4484995.3
mgm4485007.3
mgm4485005.3
mgm4735456.3
mgm4485013.3
mgm4484955.3
mgm4485065.3
mgm4558941.3
mgm4510367.3
mgm4485400.3
mgm4484970.3
mgm4558925.3
mgm4558932.3
mgm4485074.3
mgm4484961.3
mgm4484976.3
mgm4572123.3
mgm4558920.3
mgm4548350.3
mgm4485047.3
mgm4558927.3
mgm4485401.3
mgm4484978.3
mgm4485027.3
mgm4547284.3
mgm4559623.3
mgm4503838.3
mgm4485019.3
mgm4484951.3
mgm4485002.3
mgm4484972.3
mgm4485394.3
mgm4572198.3
mgm4558933.3
mgm4523755.3
mgm4484949.3
mgm4558905.3
mgm4524850.3
mgm4558930.3
mgm4510363.3
mgm4485003.3
mgm4484979.3
mgm4485072.3
mgm4484971.3
mgm4485046.3
mgm4485068.3
mgm4485060.3
mgm4476877.3
mgm4484974.3
mgm4735449.3
mgm4485051.3
mgm4558906.3
mgm4735457.3
mgm4525984.3
mgm4503837.3
mgm4445990.3
mgm4508947.3
mgm4485026.3
mgm4485057.3
mgm4484993.3
mgm4510362.3
mgm4485023.3
mgm4485042.3
mgm4484954.3
mgm4485402.3
mgm4484953.3
mgm4523771.3
mgm4485006.3
mgm4572122.3
mgm4547283.3
mgm4558899.3
mgm4524854.3
""".strip().split()



start = time.clock()
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
run_time = (start - time.clock())/60
print("this took: {} minutes".format(run_time))