Python 如何异步映射/过滤异步iterable?

Python 如何异步映射/过滤异步iterable?,python,async-await,python-asyncio,Python,Async Await,Python Asyncio,假设我有一个异步iterable,我可以使用async for传递它,那么我如何将它映射并过滤到一个新的异步迭代器?下面的代码是对我如何使用同步iterable执行相同操作的改编,它不起作用,因为yield不允许在async defs中使用 async def mapfilter(aiterable, p, func): async for payload in aiterable: if p(payload): # This part isn'

假设我有一个异步iterable,我可以使用
async for
传递它,那么我如何将它映射并过滤到一个新的异步迭代器?下面的代码是对我如何使用同步iterable执行相同操作的改编,它不起作用,因为
yield
不允许在
async def
s中使用

async def mapfilter(aiterable, p, func):
    async for payload in aiterable:
        if p(payload):

            # This part isn't allowed, but hopefully it should be clear
            # what I'm trying to accomplish.
            yield func(payload)
在协同程序中使用yield。要实现你的想法,我认为唯一的办法就是实施。如果我是对的,像这样:

class MapFilter:
    def __init__(self, aiterable, p, func):
        self.aiterable = aiterable
        self.p = p
        self.func = func

    async def __aiter__(self):
        return self

    async def __anext__(self):
        while True:
            payload = await self.aiterable.__anext__()  # StopAsyncIteration would be raise here on no new values
            if self.p(payload):
                return self.func(payload)
让我们测试一下。下面是helper
arange
class的完整示例(我从中获取):

支持的是,建议允许异步生成器使用您提出的相同语法

同时,如果您不想处理异步迭代器样板文件,还可以使用其注释中提到的
asyncio_extras

发件人:

还有一种支持从构造中获得收益的方法。

Python 3.6+的异步
map
filter
实现,专门设计用于按顺序返回子任务,以先完成者为准

from collections import deque
from typing import Any, Callable, Collection, AsyncIterator, Iterator, Union


async def _next(gg):
    # repackaging non-asyncio next() as async-like anext()
    try:
        return next(gg)
    except StopIteration:
        raise StopAsyncIteration


async def _aionext(gg):
    # there is no anext() :(
    return await gg.__anext__()


async def map_unordered(fn:Callable, args:Union[Iterator,Collection,AsyncIterator], maxsize=None):
    """
    Async generator yielding return values of resolved invocations
    of `fn` against arg in args list

    Arguments are consumed and fed to callable in the order they are presented in args.
    Results are yielded NOT in order of args. Earliest done is yielded.

    If `size` is specified, worker tasks pool is constrained to that size.

    This is asyncio equivalent of Gevent's `imap_unordered(fn, args_iterable, pool_size)`
    http://www.gevent.org/api/gevent.pool.html#gevent.pool.Group.imap_unordered

    `args` may be Async Iterator or regular Iterator. 
     Thus, you can chain `map_unordered` as `args` for another `map_unordered`

    Because this is an async generator, cannot consume it as regular iterable.
    Must use `async for`.

    Usage example:

            # note NO await in this assignment
            gen = map_unordered(fn, arguments_iter, maxsize=3)
            async for returned_value in gen:
                yield returned_value

    """
    if maxsize == 0:
        raise ValueError(
            'Argument `maxsize` cannot be set to zero. '
            'Use `None` to indicate no limit.'
        )

    # Make args list consumable like a generator
    # so repeated islice(args, size) calls against `args` move slice down the list.

    if hasattr(args, '__anext__'):
        n = _aionext
    elif hasattr(args, '__next__'):
        n = _next
    else:
        args = iter(args)
        n = _next

    have_args = True  # assumed. Don't len(args).
    pending_tasks = deque()

    while have_args or len(pending_tasks):
        try:
            while len(pending_tasks) != maxsize:
                arg = await n(args)
                pending_tasks.append(
                    asyncio.Task(fn(arg))
                )
        except StopAsyncIteration:
            have_args = False

        if not len(pending_tasks):
            return

        done, pending_tasks = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
        pending_tasks = deque(pending_tasks)

        for task in done:
            yield await task  # await converts task object into its return value


async def _filter_wrapper(fn, arg):
    return (await fn(arg)), arg

async def _filter_none(arg):
    return not (arg is None)

async def filter_unordered(fn:Union[Callable,None], args:Union[Iterator,Collection,AsyncIterator], maxsize=None):
    """
    Async filter generator yielding values of `args` collection that match filter condition.
    Like python's native `filter([Callable|None], iterable)` but:
    - allows iterable to be async iterator
    - allows callable to be async callable
    - returns results OUT OF ORDER - whichever passes filter test first.

    Arguments are consumed and fed to callable in the order they are presented in args.
    Results are yielded NOT in order of args. Earliest done and passing the filter condition is yielded.

    If `maxsize` is specified, worker tasks pool is constrained to that size.

    This is inspired by Gevent's `imap_unordered(fn, args_iterable, pool_size)`
    http://www.gevent.org/api/gevent.pool.html#gevent.pool.Group.imap_unordered

    Because this is an async generator, cannot consume it as regular iterable.
    Must use `async for`.

    Usage example:

            # note NO await in this assignment
            gen = filter_unordered(fn, arguments_iter, maxsize=3)
            async for returned_value in gen:
                yield returned_value

    """
    if maxsize == 0:
        raise ValueError(
            'Argument `maxsize` cannot be set to zero. '
            'Use `None` to indicate no limit.'
        )

    if hasattr(args, '__anext__'):
        n = _aionext
    elif hasattr(args, '__next__'):
        n = _next
    else:
        args = iter(args)
        n = _next

    if fn is None:
        fn = _filter_none

    have_args = True  # assumed. Don't len(args).
    pending_tasks = deque()

    while have_args or len(pending_tasks):
        try:
            while len(pending_tasks) != maxsize:
                arg = await n(args)
                pending_tasks.append(
                    asyncio.Task(_filter_wrapper(fn,arg))
                )
        except StopAsyncIteration:
            have_args = False

        if not len(pending_tasks):
            return

        done, pending_tasks = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
        pending_tasks = deque(pending_tasks)

        for task in done:
            filter_match, arg = await task
            if filter_match:
                yield arg
与Gevent的
imap_unordered
类似,但与Gevent的版本不同,它还允许args iterable作为异步值生成器。这意味着你可以把这些锁起来

鉴于:

async def worker(seconds):
    print('> Start wait', seconds)
    await asyncio.sleep(seconds)
    print('< End wait', seconds)
    return seconds


async def to_aio_gen(ll):
    for e in ll:
        yield e

async def test_map(ll, size=None):
    t = time.time()
    async for v in map_unordered(worker, ll, maxsize=size):
        print('-- elapsed second', round(time.time() - t, 1), ' received value', v)


ll = [
    0.2,
    0.4,
    0.8,
    1.2,
    1.1,
    0.3,
    0.6,
    0.9,
]

你看过吗?@jornsharpe这个库不是关于asyncio的,它是关于线程的。尝试实现一种产生内部异步函数的方法:是的,我想我希望能有一些语法糖来完成所有这些。接受你的答案,因为似乎没有。asyncio_extras为它提供了一些语法糖分:这不应该是库的一部分吗?异步库似乎是半生不熟的,缺少一些相当常见的构造。
@async_generator
async def mygenerator(websites):
    for website in websites:
        page = await http_fetch(website)
        await yield_async(page)

async def fetch_pages():
    websites = ('http://foo.bar', 'http://example.org')
    async for sanitized_page in mygenerator(websites):
        print(sanitized_page)
from collections import deque
from typing import Any, Callable, Collection, AsyncIterator, Iterator, Union


async def _next(gg):
    # repackaging non-asyncio next() as async-like anext()
    try:
        return next(gg)
    except StopIteration:
        raise StopAsyncIteration


async def _aionext(gg):
    # there is no anext() :(
    return await gg.__anext__()


async def map_unordered(fn:Callable, args:Union[Iterator,Collection,AsyncIterator], maxsize=None):
    """
    Async generator yielding return values of resolved invocations
    of `fn` against arg in args list

    Arguments are consumed and fed to callable in the order they are presented in args.
    Results are yielded NOT in order of args. Earliest done is yielded.

    If `size` is specified, worker tasks pool is constrained to that size.

    This is asyncio equivalent of Gevent's `imap_unordered(fn, args_iterable, pool_size)`
    http://www.gevent.org/api/gevent.pool.html#gevent.pool.Group.imap_unordered

    `args` may be Async Iterator or regular Iterator. 
     Thus, you can chain `map_unordered` as `args` for another `map_unordered`

    Because this is an async generator, cannot consume it as regular iterable.
    Must use `async for`.

    Usage example:

            # note NO await in this assignment
            gen = map_unordered(fn, arguments_iter, maxsize=3)
            async for returned_value in gen:
                yield returned_value

    """
    if maxsize == 0:
        raise ValueError(
            'Argument `maxsize` cannot be set to zero. '
            'Use `None` to indicate no limit.'
        )

    # Make args list consumable like a generator
    # so repeated islice(args, size) calls against `args` move slice down the list.

    if hasattr(args, '__anext__'):
        n = _aionext
    elif hasattr(args, '__next__'):
        n = _next
    else:
        args = iter(args)
        n = _next

    have_args = True  # assumed. Don't len(args).
    pending_tasks = deque()

    while have_args or len(pending_tasks):
        try:
            while len(pending_tasks) != maxsize:
                arg = await n(args)
                pending_tasks.append(
                    asyncio.Task(fn(arg))
                )
        except StopAsyncIteration:
            have_args = False

        if not len(pending_tasks):
            return

        done, pending_tasks = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
        pending_tasks = deque(pending_tasks)

        for task in done:
            yield await task  # await converts task object into its return value


async def _filter_wrapper(fn, arg):
    return (await fn(arg)), arg

async def _filter_none(arg):
    return not (arg is None)

async def filter_unordered(fn:Union[Callable,None], args:Union[Iterator,Collection,AsyncIterator], maxsize=None):
    """
    Async filter generator yielding values of `args` collection that match filter condition.
    Like python's native `filter([Callable|None], iterable)` but:
    - allows iterable to be async iterator
    - allows callable to be async callable
    - returns results OUT OF ORDER - whichever passes filter test first.

    Arguments are consumed and fed to callable in the order they are presented in args.
    Results are yielded NOT in order of args. Earliest done and passing the filter condition is yielded.

    If `maxsize` is specified, worker tasks pool is constrained to that size.

    This is inspired by Gevent's `imap_unordered(fn, args_iterable, pool_size)`
    http://www.gevent.org/api/gevent.pool.html#gevent.pool.Group.imap_unordered

    Because this is an async generator, cannot consume it as regular iterable.
    Must use `async for`.

    Usage example:

            # note NO await in this assignment
            gen = filter_unordered(fn, arguments_iter, maxsize=3)
            async for returned_value in gen:
                yield returned_value

    """
    if maxsize == 0:
        raise ValueError(
            'Argument `maxsize` cannot be set to zero. '
            'Use `None` to indicate no limit.'
        )

    if hasattr(args, '__anext__'):
        n = _aionext
    elif hasattr(args, '__next__'):
        n = _next
    else:
        args = iter(args)
        n = _next

    if fn is None:
        fn = _filter_none

    have_args = True  # assumed. Don't len(args).
    pending_tasks = deque()

    while have_args or len(pending_tasks):
        try:
            while len(pending_tasks) != maxsize:
                arg = await n(args)
                pending_tasks.append(
                    asyncio.Task(_filter_wrapper(fn,arg))
                )
        except StopAsyncIteration:
            have_args = False

        if not len(pending_tasks):
            return

        done, pending_tasks = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED)
        pending_tasks = deque(pending_tasks)

        for task in done:
            filter_match, arg = await task
            if filter_match:
                yield arg
async def worker(seconds):
    print('> Start wait', seconds)
    await asyncio.sleep(seconds)
    print('< End wait', seconds)
    return seconds


async def to_aio_gen(ll):
    for e in ll:
        yield e

async def test_map(ll, size=None):
    t = time.time()
    async for v in map_unordered(worker, ll, maxsize=size):
        print('-- elapsed second', round(time.time() - t, 1), ' received value', v)


ll = [
    0.2,
    0.4,
    0.8,
    1.2,
    1.1,
    0.3,
    0.6,
    0.9,
]
>>> asyncio.run(test_map(ll, 3))
> Start wait 0.2
> Start wait 0.4
> Start wait 0.8
< End wait 0.2
-- elapsed second 0.2  received value 0.2
> Start wait 1.2
< End wait 0.4
-- elapsed second 0.4  received value 0.4
> Start wait 1.1
< End wait 0.8
-- elapsed second 0.8  received value 0.8
> Start wait 0.3
< End wait 0.3
-- elapsed second 1.1  received value 0.3
> Start wait 0.6
< End wait 1.2
-- elapsed second 1.4  received value 1.2
> Start wait 0.9
< End wait 1.1
-- elapsed second 1.5  received value 1.1
< End wait 0.6
-- elapsed second 1.7  received value 0.6
< End wait 0.9
-- elapsed second 2.3  received value 0.9
async def more_than_half(v):
    await asyncio.sleep(v)
    return v > 0.5

>>> asyncio.run(filter_unordered(more_than_half, aio_gen(ll), 3))
-- elapsed second 0.8  received value 0.8
-- elapsed second 1.4  received value 1.2
-- elapsed second 1.5  received value 1.1
-- elapsed second 1.7  received value 0.6
-- elapsed second 2.3  received value 0.9