Python 哎呀。从url保存的巨大csv未满
我有用于下载存储在.gz档案中的巨大csv的代码Python 哎呀。从url保存的巨大csv未满,python,csv,gzip,python-asyncio,aiohttp,Python,Csv,Gzip,Python Asyncio,Aiohttp,我有用于下载存储在.gz档案中的巨大csv的代码 import asyncio import re import zlib import aiohttp from aiohttp import ClientTimeout from aiohttp.client_exceptions import InvalidURL timeout = ClientTimeout(total=600) async def download(link, session): out_file_pat
import asyncio
import re
import zlib
import aiohttp
from aiohttp import ClientTimeout
from aiohttp.client_exceptions import InvalidURL
timeout = ClientTimeout(total=600)
async def download(link, session):
out_file_path = link.split("/")[-1][:-3]
try:
async with sem, session.get(
'http://111.11.111.111/test/' + link) as resp:
d = zlib.decompressobj(zlib.MAX_WBITS | 32)
with open(out, 'wb') as file:
async for data in resp.content.iter_chunks():
chunk = d.decompress(data)
file.write(chunk)
return True
except InvalidURL as invalid_url:
...
except TimeoutError as e:
...
async def main():
links = ['test/1.csv.gz']
sem = asyncio.Semaphore(10)
async with aiohttp.ClientSession(
auth=aiohttp.BasicAuth(
'test',
'test'),
timeout=timeout
) as session:
tasks = (download(
link=link,
session=session,
sem=sem
) for link in links)
results = await asyncio.gather(*tasks)
return results
asyncio.run(main())
这段代码工作得很好,但是,我下载的所有文件只有100mb。我下载的所有档案都有更多的内容长度
如何修复并下载完整数据?通过下一种方法解决我的问题:
async with downloading_queue, aiohttp.ClientSession(
auth=aiohttp.BasicAuth(
self.config['log'],
self.config['pwd']),
timeout=CLIENT_TIMEOUT
).get(url=url) as resp:
file = BytesIO(await resp.content.read())
with gzip.open(file, 'rt') as decompressed_file:
with open(out_file_path, 'w') as outfile:
shutil.copyfileobj(decompressed_file, outfile)
用下一种方法解决我的问题: