在RxJava中连接两个大型数据集

在RxJava中连接两个大型数据集,java,rx-java,reactivex,Java,Rx Java,Reactivex,我使用RxJava处理两个大数据集(数百万条记录),它们需要由一个ID连接起来。这两个数据集不一定包含相同的记录。但它们是按ID排序的 我发现join方法可以用于此,下面的实验执行“完全连接”,并根据匹配的记录进行过滤 public class BatchTest { public static void main (String[] args) { Observable<Integer> myLeft = Observable.ju

我使用RxJava处理两个大数据集(数百万条记录),它们需要由一个ID连接起来。这两个数据集不一定包含相同的记录。但它们是按ID排序的

我发现
join
方法可以用于此,下面的实验执行“完全连接”,并根据匹配的记录进行过滤

  public class BatchTest
  {
     public static void main (String[] args)
     {
        Observable<Integer> myLeft    = Observable.just (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
        Observable<Integer> myRight   = Observable.just (1, 3, 5, 7, 9);

        myLeft.join (
           myRight,
           new Func1<Integer, Observable<Integer>>()
           {
              public Observable<Integer> call (Integer aT)
              {
                 return Observable.never ();
              }
           },
           new Func1<Integer, Observable<Integer>>()
           {
              public Observable<Integer> call (Integer aT)
              {
                 return Observable.never ();
              }
           },
           new Func2<Integer, Integer, Integer[]>()
           {
              public Integer[] call (Integer aT1, Integer aT2)
              {
                 return new Integer[] {aT1, aT2};
              }
           })
        .filter (new Func1<Integer[], Boolean> ()
        {
           public Boolean call (Integer[] aT)
           {
              return aT[0].equals (aT[1]);
           }
        })
        .subscribe (new Action1<Integer[]> ()
        {
           public void call (Integer[] aT)
           {
              System.out.printf ("%d, %d\n", aT[0], aT[1]);
           }
        });
     }
  }
公共类批处理测试
{
公共静态void main(字符串[]args)
{
Observable myLeft=Observable.just(1,2,3,4,5,6,7,8,9,10);
可见光=可见光(1,3,5,7,9);
myLeft.join(
没错,
新功能1()
{
公共可观测呼叫(整数aT)
{
return-Observable.never();
}
},
新功能1()
{
公共可观测呼叫(整数aT)
{
return-Observable.never();
}
},
新功能2()
{
公共整数[]调用(整数aT1,整数aT2)
{
返回新的整数[]{aT1,aT2};
}
})
.filter(新函数1()
{
公共布尔调用(整数[]在)
{
在[0]返回。等于(在[1]);
}
})
.订阅(新操作1()
{
公共无效调用(整数[]在)
{
System.out.printf(“%d,%d\n”,位于[0],位于[1]);
}
});
}
}
这对于一小部分示例来说效果很好,但对于一大部分示例来说效率很低

所以我的问题是:看到集合按键排序,有没有一种方法可以使用这些选择器/窗口功能来限制连接,这样我就不必将300万条记录连接到300万条记录


或者我这样做是错误的吗?

因此,基本上我要做的是实现一个自定义的
操作符
,它接收第二个
可观察的
,并在一个新线程上订阅它。自定义订阅者基本上读取数据并将其粘贴到
阻塞队列
,然后从该队列中提取数据并与原始
可观察数据
合并

如果有人遇到相同的情况,请参见:

import java.util.Comparator;
import java.util.Objects;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

import rx.Observable;
import rx.Scheduler;
import rx.Subscriber;
import rx.functions.Action1;
import rx.functions.Func2;

/**
 * This class is an operator which can be used to join two {@link Observable} streams,
 * by matching them up using a {@link Comparator}. The two streams need to be sorted
 * according to the rules of the {@link Comparator} for this to work.
 * <p>
 * If the main stream is empty this might never get invoked even if the right stream
 * has data.
 */
public class JoinByComparisonOperator<I, R> implements Observable.Operator<R, I>
{

   private final RightSubscriber<I> subscriberRight;

   private final Comparator<I> comparator;

   private final Func2<I, I, Observable<R>> resultSelector;

  /**
   * The constructor for this class.
   * <p>
   * @param aRight
   *     The observable that is joined to the "right"
   * @param aScheduler
   *     The scheduler used to run the "right" Observable as it always needs to
   *     run on a new thread.
   * @param aComparator
   *     The comparator used to compare two input values. This should follow the
   *     same rules by which the two input streams are sorted
   * @param aResultSelector
   *     Function that gets two matching results and can handle them accordingly.
   *     Note the inputs can be null in case there was no match.
   */
   public JoinByComparisonOperator(
      final Observable<I>              aRight,
      final Scheduler                  aScheduler,
      final Comparator<I>              aComparator,
      final Func2<I, I, Observable<R>> aResultSelector
   )
   {
      subscriberRight   = new RightSubscriber<> ();
      comparator        = aComparator;
      resultSelector    = aResultSelector;

      aRight
         .subscribeOn (aScheduler)
         .subscribe (subscriberRight);
   }

   /**
    * Creates a new subscriber that gets called and passes on any calls in turn.
    * 
    * @param aSubscriber
    * @return
    * <p>
    * @see rx.functions.Func1#call(java.lang.Object)
    */
   @Override
   public Subscriber<? super I> call (final Subscriber<? super R> aSubscriber)
   {
      return new LeftSubscriber (aSubscriber);
   }


   /**
    * The subscriber for the "left" stream, which is the main stream we are operating
    * on.
    */
   private class LeftSubscriber extends Subscriber<I>
   {

      final Subscriber<? super R> nextSubscriber;

      private I nextRight;

      public LeftSubscriber (final Subscriber<? super R> aNextSubscriber)
      {
         nextSubscriber = aNextSubscriber;
      }

      private void selectResultInternal (I aLeft, I aRight)
      {
         resultSelector.call (aLeft, aRight).subscribe (new Action1<R>()
         {
            public void call (R aInput)
            {
               nextSubscriber.onNext (aInput);
            }
         });
      }

      @Override
      public void onCompleted ()
      {
         if (!nextSubscriber.isUnsubscribed ())
         {
            while (!subscriberRight.isComplete () || nextRight != null)
            {
               try
               {
                  I myNext = null;

                  if (nextRight != null)
                  {
                     myNext = nextRight;
                     nextRight = null;
                  }
                  else
                  {
                     myNext = subscriberRight.takeNext ();
                  }

                  if (myNext != null)
                  {
                     selectResultInternal (null, myNext);
                  }
               }
               catch (InterruptedException myException)
               {
                  onError (myException);
               }
            }

            nextSubscriber.onCompleted ();
         }
      }

      @Override
      public void onError (Throwable aE)
      {
         if (!nextSubscriber.isUnsubscribed ())
         {
            nextSubscriber.onCompleted ();

            subscriberRight.unsubscribe ();
         }
      }

      @Override
      public void onNext (I aInput)
      {
         if (!nextSubscriber.isUnsubscribed ())
         {
            I myRight   = null;
            I myLeft    = aInput;

            if (subscriberRight.getError () != null)
            {
               nextSubscriber.onError (subscriberRight.getError ());
               unsubscribe ();
            }

            if (!subscriberRight.isComplete ())
            {
               int myComparison = 0;

               do {

                  if (nextRight == null)
                  {
                     try
                     {
                        nextRight = subscriberRight.takeNext ();
                     }
                     catch (InterruptedException myException)
                     {
                        onError (myException);
                        return;
                     }
                  }

                  if (nextRight != null)
                  {
                     myComparison   = Objects.compare (nextRight, aInput, comparator);

                     if (myComparison < 0)
                     {
                        selectResultInternal (null, nextRight);
                        nextRight   = null;
                     }
                     else if (myComparison == 0)
                     {
                        myRight     = nextRight;
                        nextRight   = null;
                     }
                  }

               } while (myComparison < 0);
            }

            selectResultInternal (myLeft, myRight);
         }
      }
   }

   /**
    * This class is intended to consume the "right" input stream and buffer the result
    * so it can be retrieved when processing the main stream.
    */
   private class RightSubscriber<T> extends Subscriber<T>
   {

      private boolean complete = false;

      private Throwable error = null;

      private BlockingQueue<T> buffer = new ArrayBlockingQueue <> (1000);

      @Override
      public void onCompleted ()
      {
         complete = true;
      }

      @Override
      public void onError (Throwable aE)
      {
         error = aE;
      }

      @Override
      public void onNext (T aT)
      {
         try {
            buffer.put (aT);
         }
         catch (InterruptedException myException) {
            error = myException;
         }
      }

      public T takeNext() throws InterruptedException
      {
         return buffer.poll (10, TimeUnit.SECONDS);
      }

      public boolean isComplete()
      {
         return complete && buffer.size () == 0;
      }

      public Throwable getError()
      {
         return error;
      }
   };
}
import java.util.Comparator;
导入java.util.Objects;
导入java.util.concurrent.ArrayBlockingQueue;
导入java.util.concurrent.BlockingQueue;
导入java.util.concurrent.TimeUnit;
进口接收。可观察;
导入rx.Scheduler;
输入接收用户;
导入rx.functions.Action1;
导入rx.functions.Func2;
/**
*此类是一个运算符,可用于连接两个{@link Observable}流,
*通过使用{@link Comparator}匹配它们。这两条流需要分类
*根据{@link Comparator}的规则,这是可行的。
*
*如果主流是空的,那么即使正确的流是空的,也可能永远不会被调用
*有数据。
*/
公共类JoinByComparisonOperator实现Observable.Operator
{
私人最终权利认购人认购权;
私人最终比较人;
专用最终Func2结果选择器;
/**
*该类的构造函数。
*
*@param-aRight
*连接到“右”的可观察对象
*@param aScheduler
*调度程序用于运行“正确”的可观察对象,因为它总是需要这样做
*在新线程上运行。
*@param-aComparator
*用于比较两个输入值的比较器
*两个输入流的排序规则相同
*@param aResultSelector
*函数,该函数获取两个匹配结果并可以相应地处理它们。
*注意,如果不匹配,输入可以为空。
*/
公共JoinByComparisonOperator(
最终可观测正确,
最终调度程序作为调度程序,
最终比较器,
最终功能2结果选择器
)
{
subscriberRight=新的RightSubscriber();
比较器=比较器;
结果选择器=结果选择器;
正确的
.subscribeOn(ASScheduler)
.认购(认购权);
}
/**
*创建一个新的订户,该订户将被呼叫并依次传递任何呼叫。
* 
*@param aSubscriber
*@返回
*
*@see rx.functions.Func1#call(java.lang.Object)
*/
@凌驾

公共订阅者因此,基本上我要做的是实现一个自定义的
操作符
,它接收第二个
可观察的
,并在一个新线程上订阅它。自定义订阅者基本上读取数据并将其粘贴到一个
阻塞队列
,然后从中提取数据并与数据合并从原始的
可见的

如果有人遇到相同的情况,请参见:

import java.util.Comparator;
import java.util.Objects;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

import rx.Observable;
import rx.Scheduler;
import rx.Subscriber;
import rx.functions.Action1;
import rx.functions.Func2;

/**
 * This class is an operator which can be used to join two {@link Observable} streams,
 * by matching them up using a {@link Comparator}. The two streams need to be sorted
 * according to the rules of the {@link Comparator} for this to work.
 * <p>
 * If the main stream is empty this might never get invoked even if the right stream
 * has data.
 */
public class JoinByComparisonOperator<I, R> implements Observable.Operator<R, I>
{

   private final RightSubscriber<I> subscriberRight;

   private final Comparator<I> comparator;

   private final Func2<I, I, Observable<R>> resultSelector;

  /**
   * The constructor for this class.
   * <p>
   * @param aRight
   *     The observable that is joined to the "right"
   * @param aScheduler
   *     The scheduler used to run the "right" Observable as it always needs to
   *     run on a new thread.
   * @param aComparator
   *     The comparator used to compare two input values. This should follow the
   *     same rules by which the two input streams are sorted
   * @param aResultSelector
   *     Function that gets two matching results and can handle them accordingly.
   *     Note the inputs can be null in case there was no match.
   */
   public JoinByComparisonOperator(
      final Observable<I>              aRight,
      final Scheduler                  aScheduler,
      final Comparator<I>              aComparator,
      final Func2<I, I, Observable<R>> aResultSelector
   )
   {
      subscriberRight   = new RightSubscriber<> ();
      comparator        = aComparator;
      resultSelector    = aResultSelector;

      aRight
         .subscribeOn (aScheduler)
         .subscribe (subscriberRight);
   }

   /**
    * Creates a new subscriber that gets called and passes on any calls in turn.
    * 
    * @param aSubscriber
    * @return
    * <p>
    * @see rx.functions.Func1#call(java.lang.Object)
    */
   @Override
   public Subscriber<? super I> call (final Subscriber<? super R> aSubscriber)
   {
      return new LeftSubscriber (aSubscriber);
   }


   /**
    * The subscriber for the "left" stream, which is the main stream we are operating
    * on.
    */
   private class LeftSubscriber extends Subscriber<I>
   {

      final Subscriber<? super R> nextSubscriber;

      private I nextRight;

      public LeftSubscriber (final Subscriber<? super R> aNextSubscriber)
      {
         nextSubscriber = aNextSubscriber;
      }

      private void selectResultInternal (I aLeft, I aRight)
      {
         resultSelector.call (aLeft, aRight).subscribe (new Action1<R>()
         {
            public void call (R aInput)
            {
               nextSubscriber.onNext (aInput);
            }
         });
      }

      @Override
      public void onCompleted ()
      {
         if (!nextSubscriber.isUnsubscribed ())
         {
            while (!subscriberRight.isComplete () || nextRight != null)
            {
               try
               {
                  I myNext = null;

                  if (nextRight != null)
                  {
                     myNext = nextRight;
                     nextRight = null;
                  }
                  else
                  {
                     myNext = subscriberRight.takeNext ();
                  }

                  if (myNext != null)
                  {
                     selectResultInternal (null, myNext);
                  }
               }
               catch (InterruptedException myException)
               {
                  onError (myException);
               }
            }

            nextSubscriber.onCompleted ();
         }
      }

      @Override
      public void onError (Throwable aE)
      {
         if (!nextSubscriber.isUnsubscribed ())
         {
            nextSubscriber.onCompleted ();

            subscriberRight.unsubscribe ();
         }
      }

      @Override
      public void onNext (I aInput)
      {
         if (!nextSubscriber.isUnsubscribed ())
         {
            I myRight   = null;
            I myLeft    = aInput;

            if (subscriberRight.getError () != null)
            {
               nextSubscriber.onError (subscriberRight.getError ());
               unsubscribe ();
            }

            if (!subscriberRight.isComplete ())
            {
               int myComparison = 0;

               do {

                  if (nextRight == null)
                  {
                     try
                     {
                        nextRight = subscriberRight.takeNext ();
                     }
                     catch (InterruptedException myException)
                     {
                        onError (myException);
                        return;
                     }
                  }

                  if (nextRight != null)
                  {
                     myComparison   = Objects.compare (nextRight, aInput, comparator);

                     if (myComparison < 0)
                     {
                        selectResultInternal (null, nextRight);
                        nextRight   = null;
                     }
                     else if (myComparison == 0)
                     {
                        myRight     = nextRight;
                        nextRight   = null;
                     }
                  }

               } while (myComparison < 0);
            }

            selectResultInternal (myLeft, myRight);
         }
      }
   }

   /**
    * This class is intended to consume the "right" input stream and buffer the result
    * so it can be retrieved when processing the main stream.
    */
   private class RightSubscriber<T> extends Subscriber<T>
   {

      private boolean complete = false;

      private Throwable error = null;

      private BlockingQueue<T> buffer = new ArrayBlockingQueue <> (1000);

      @Override
      public void onCompleted ()
      {
         complete = true;
      }

      @Override
      public void onError (Throwable aE)
      {
         error = aE;
      }

      @Override
      public void onNext (T aT)
      {
         try {
            buffer.put (aT);
         }
         catch (InterruptedException myException) {
            error = myException;
         }
      }

      public T takeNext() throws InterruptedException
      {
         return buffer.poll (10, TimeUnit.SECONDS);
      }

      public boolean isComplete()
      {
         return complete && buffer.size () == 0;
      }

      public Throwable getError()
      {
         return error;
      }
   };
}
import java.util.Comparator;
导入java.util.Objects;
导入java.util.concurrent.ArrayBlockingQueue;
导入java.util.concurrent.BlockingQueue;
导入java.util.concurrent.TimeUnit;
进口接收。可观察;
导入rx.Scheduler;
输入接收用户;
导入rx.functions.Action1;
导入rx.functions.Func2;
/**
*此类是一个运算符,可用于连接两个{@link Observable}流,
*通过使用{@link Comparator}将它们匹配起来。需要对这两个流进行排序
*根据{@link Comparator}的规则,这是可行的。
*
*如果主流是空的,那么即使正确的流是空的,也可能永远不会被调用
*有数据。
*/
公共类JoinByComparisonOperator实现Observable.Operator
{
私人最终权利认购人认购权;
私人最终比较人;
专用最终Func2结果选择器;
/**
*该类的构造函数。
*
*@param-aRight
*连接到“右”的可观察对象