Scala 如何将大流分组为子流_Scala_Fs2

Scala 如何将大流分组为子流

scala

Scala 如何将大流分组为子流,scala,fs2,Scala,Fs2,我想将大型流[F，A]分组到流[Stream[F，A]]中，内部流最多包含n元素这就是我所做的，基本上是将块导入Queue[F，Queue[F，Chunk[A]]，然后生成作为结果流的队列元素 implicit class StreamSyntax[F[_], A](s: Stream[F, A])( implicit F: Concurrent[F]) { def groupedPipe( lastQRef: Ref[F, Queue[F, Option[Ch

我想将大型

流[F，A]

分组到

流[Stream[F，A]]

中，内部流最多包含

元素

这就是我所做的，基本上是将块导入

Queue[F，Queue[F，Chunk[A]]

，然后生成作为结果流的队列元素

 implicit class StreamSyntax[F[_], A](s: Stream[F, A])(
    implicit F: Concurrent[F]) {

    def groupedPipe(
      lastQRef: Ref[F, Queue[F, Option[Chunk[A]]]],
      n: Int): Pipe[F, A, Stream[F, A]] = { in =>
      val initQs =
        Queue.unbounded[F, Option[Queue[F, Option[Chunk[A]]]]].flatMap { qq =>
          Queue.bounded[F, Option[Chunk[A]]](1).flatMap { q =>
            lastQRef.set(q) *> qq.enqueue1(Some(q)).as(qq -> q)
          }
        }

      Stream.eval(initQs).flatMap {
        case (qq, initQ) =>
          def newQueue = Queue.bounded[F, Option[Chunk[A]]](1).flatMap { q =>
            qq.enqueue1(Some(q)) *> lastQRef.set(q).as(q)
          }

          val evalStream = {
            in.chunks
              .evalMapAccumulate((0, initQ)) {
                case ((i, q), c) if i + c.size >= n =>
                  val (l, r) = c.splitAt(n - i)
                  q.enqueue1(Some(l)) >> q.enqueue1(None) >> q
                    .enqueue1(None) >> newQueue.flatMap { nq =>
                    nq.enqueue1(Some(r)).as(((r.size, nq), c))
                  }
                case ((i, q), c) if (i + c.size) < n =>
                  q.enqueue1(Some(c)).as(((i + c.size, q), c))
              }
              .attempt ++ Stream.eval {
              lastQRef.get.flatMap { last =>
                last.enqueue1(None) *> last.enqueue1(None)
              } *> qq.enqueue1(None)
            }
          }
          qq.dequeue.unNoneTerminate
            .map(
              q =>
                q.dequeue.unNoneTerminate
                  .flatMap(Stream.chunk)
                  .onFinalize(
                    q.dequeueChunk(Int.MaxValue).unNoneTerminate.compile.drain))
            .concurrently(evalStream)
      }
    }

    def grouped(n: Int) = {
      Stream.eval {
        Queue.unbounded[F, Option[Chunk[A]]].flatMap { empty =>
          Ref.of[F, Queue[F, Option[Chunk[A]]]](empty)
        }
      }.flatMap { ref =>
        val p = groupedPipe(ref, n)
        s.through(p)
      }
    }
  }

implicit类StreamSyntax[F[\ux]，A]（s:Stream[F，A]）(
隐式F：并发[F]）{
def分组管道(
lastQRef:Ref[F，Queue[F，Option[Chunk[A]]]，
n:Int）：管道[F，A，流[F，A]]={in=>
val initQs=
Queue.unbounded[F，Option[Queue[F，Option[Chunk[A].].].flatMap{qq=>
Queue.bounded[F，Option[Chunk[A]]（1）.flatMap{q=>
lastQRef.set（q）*>qq.enqueue1（部分（q））.as（qq->q）
}
}
Stream.eval（initQs.flatMap）{
案例（qq，initQ）=>
def newQueue=Queue.bounded[F，Option[Chunk[A]]（1）.flatMap{q=>
qq.enqueue1（Some（q））*>lastQRef.set（q）.as（q）
}
val evalStream={
成批
.evalMapAccumulate（（0，initQ））{
如果i+c.size>=n=>
val（l，r）=c.splitAt（n-i）
q、 排队1（一些（l））>>q.排队1（无）>>q
.enqueue1（无）>>newQueue.flatMap{nq=>
nq.enqueue1（一些（r））.as（（r.size，nq），c））
}
如果（i+c.size）
q、 排队1（一些（c））.as（（i+c.size，q，c））
}
.trument++Stream.eval{
lastQRef.get.flatMap{last=>
last.enqueue1（无）*>last.enqueue1（无）
}*>qq.enqueue1（无）
}
}
qq.dequeue.unNoneTerminate
.地图(
q=>
q、 退出队列
.flatMap（Stream.chunk）
.完成(
q、 dequeueChunk（Int.MaxValue.unNoneTerminate.compile.drain））
.同时（蒸发流）
}
}
def分组（n:Int）={
溪流评估{
Queue.unbounded[F，Option[Chunk[A]]].flatMap{empty=>
参考[F，队列[F，选项[Chunk[A]]]]（空）
}
}.flatMap{ref=>
val p=组管道（参考，n）
s、 至（p）
}
}
}

但它非常复杂，有没有更简单的方法？

fs2有一些方法可以帮助分组

stream.chunkN(n).map(Stream.chunk)

stream.chunkLimit(n).map(Stream.chunk)

chunkN

生成大小为n的块，直到流结束

chunkLimit

分割现有块，并可以生成大小可变的块

scala> Stream(1,2,3).repeat.chunkN(2).take(5).toList
res0: List[Chunk[Int]] = List(Chunk(1, 2), Chunk(3, 1), Chunk(2, 3), Chunk(1, 2), Chunk(3, 1))

scala> (Stream(1) ++ Stream(2, 3) ++ Stream(4, 5, 6)).chunkLimit(2).toList
res0: List[Chunk[Int]] = List(Chunk(1), Chunk(2, 3), Chunk(4, 5), Chunk(6))

除了已经提到的<代码> CukSnn之外，还考虑使用（FS2 1.0.1）：

def-groupWithin[F2[x]>：F[x]]（n:Int，d:FiniteDuration）（隐式计时器：计时器[F2]，F:Concurrent[F2]）：流[F2，块[O]

将该流划分为在一个时间窗口内接收到的元素组，或受元素数量的限制（以先发生的为准）。如果在给定的时间窗口内无法从上游提取元素，则不会发出空组

注：每次下游拉动时都会启动一个时间窗口

我不确定您为什么希望它是嵌套流，因为要求在一批中“最多包含

n个元素”——这意味着您要跟踪有限数量的元素（这正是块的用途）。无论哪种方式，块
始终可以用流表示为流。块
：
val chunks: Stream[F, Chunk[O]] = ???
val streamOfStreams:  Stream[F, Stream[F, O]] = chunks.map(Stream.chunk)

下面是如何在
中使用groupWithin的完整示例：
import cats.implicits._
import cats.effect.{ExitCode, IO, IOApp}
import fs2._
import scala.concurrent.duration._

object GroupingDemo extends IOApp {
  override def run(args: List[String]): IO[ExitCode] = {
    Stream('a, 'b, 'c).covary[IO]
      .groupWithin(2, 1.second)
      .map(_.toList)
      .showLinesStdOut
      .compile.drain
      .as(ExitCode.Success)
  }
}

产出：
列表（'a，'b）
列表（'c）
最后，我使用了这样一个更可靠的版本（使用热交换确保队列终止）
  def grouped(
      innerSize: Int
    )(implicit F: Async[F]): Stream[F, Stream[F, A]] = {

      type InnerQueue = Queue[F, Option[Chunk[A]]]
      type OuterQueue = Queue[F, Option[InnerQueue]]

      def swapperInner(swapper: Hotswap[F, InnerQueue], outer: OuterQueue) = {
        val innerRes =
          Resource.make(Queue.unbounded[F, Option[Chunk[A]]])(_.offer(None))
        swapper.swap(innerRes).flatTap(q => outer.offer(q.some))
      }

      def loopChunk(
        gathered: Int,
        curr: Queue[F, Option[Chunk[A]]],
        chunk: Chunk[A],
        newInnerQueue: F[InnerQueue]
      ): F[(Int, Queue[F, Option[Chunk[A]]])] = {
        if (gathered + chunk.size > innerSize) {
          val (left, right) = chunk.splitAt(innerSize - gathered)
          curr.offer(left.some) >> newInnerQueue.flatMap { nq =>
            loopChunk(0, nq, right, newInnerQueue)
          }
        } else if (gathered + chunk.size == innerSize) {
          curr.offer(chunk.some) >> newInnerQueue.tupleLeft(
            0
          )
        } else {
          curr.offer(chunk.some).as(gathered + chunk.size -> curr)
        }
      }

      val prepare = for {
        outer   <- Resource.eval(Queue.unbounded[F, Option[InnerQueue]])
        swapper <- Hotswap.create[F, InnerQueue]
      } yield outer -> swapper

      Stream.resource(prepare).flatMap {
        case (outer, swapper) =>
          val newInner = swapperInner(swapper, outer)
          val background = Stream.eval(newInner).flatMap { initQueue =>
            s.chunks
              .filter(_.nonEmpty)
              .evalMapAccumulate(0 -> initQueue) { (state, chunk) =>
                val (gathered, curr) = state
                loopChunk(gathered, curr, chunk, newInner).tupleRight({})
              }
              .onFinalize(swapper.clear *> outer.offer(None))
          }
          val foreground = Stream
            .fromQueueNoneTerminated(outer)
            .map(i => Stream.fromQueueNoneTerminatedChunk(i))
          foreground.concurrently(background)
      }

    }

def分组(
内部尺寸：Int
)（隐式F:Async[F]）：流[F，流[F，A]={
输入InnerQueue=Queue[F，Option[Chunk[A]]
键入OuterQueue=Queue[F，选项[InnerQueue]]
def SWAPPERINER（交换程序：热交换[F，InnerQueue]，外部：OuterQueue）={
内谷=
Resource.make（Queue.unbounded[F，Option[Chunk[A]]）（u.offer（None））
swapper.swap（innerRes.flatTap（q=>outer.offer（q.some））
}
def环块(
集合：Int，
当前：队列[F，选项[Chunk[A]]，
chunk:chunk[A]，
newInnerQueue:F[InnerQueue]
)：F[（Int，Queue[F，Option[Chunk[A]]）]={
if（聚集+chunk.size>innerSize）{
val（左，右）=chunk.splitAt（内部大小-聚集）
curr.offer（left.some）>>newInnerQueue.flatMap{nq=>
loopChunk（0，nq，右，newInnerQueue）
}
}else if（collected+chunk.size==innerSize）{
当前报价（chunk.some）>>newInnerQueue.tupleLeft(
0
)
}否则{
当前报价（chunk.some）.as（collected+chunk.size->curr）
}
}
准备{
外面的
val newInner=swapperInner（swapper，外部）
val background=Stream.eval（newInner）.flatMap{initQueue=>
s、 大块
.filter（u.nonEmpty）
.evalMapAccumulate（0->initQueue）{（状态，块）=>
val（已收集，当前）=状态
loopChunk（collected、curr、chunk、newInner）.tupleRight（{}）
}
.onFinalize（swapper.clear*>outer.offer（无））
}
val前台=流
.fromQueueNoneTerminated（外部）
.map（i=>Stream.fromQueueNoneTerminatedChunk（i））
前台。并发（后台）
}
}
是的，但是组数据intiChunk
而不是Stream
是的，它生成块。幸运的是Stream.Chunk方法可以很容易地从块创建流，如果您需要的话，可能我第一次误解了您。我的建议是生成严格块流，但您需要惰性块流