Java 如何在两个或多个流上执行外部联接

Java 如何在两个或多个流上执行外部联接,java,join,java-stream,Java,Join,Java Stream,在我的应用程序中,我使用了几个提供表单元素(ID、value)的流。元素由以下类定义: static final class Element<T> implements Comparable<Element<T>> { final long id; final T value; Element(int id, T value) { this.id = id; this.value = value;

在我的应用程序中,我使用了几个提供表单元素(ID、value)的流。元素由以下类定义:

static final class Element<T> implements Comparable<Element<T>> {
    final long id;
    final T value;

    Element(int id, T value) {
        this.id = id;
        this.value = value;
    }

    @Override
    public int compareTo(Element o) {
        return Long.compare(id, o.id);
    }
}
因为到目前为止,我对Java的流式API的经验非常基础,所以我通常使用迭代器来完成这些任务

有没有一种惯用(且有效)的方法来执行这种与流的连接?有我可以使用的实用程序库吗


旁注:示例已简化。应用程序从一个类似于面向列的数据存储(没有真正的DMB)的地方接收数据,该数据存储的大小为几GB,并且不容易放入内存。这种连接操作也没有内置支持。

最简单的解决方案是编写迭代器,然后使用StreamSupport::stream从迭代器创建流。但是,如果要使用并行流,您可能会发现性能方面存在一些问题。

最简单的解决方案是编写迭代器,然后使用StreamSupport::stream从迭代器创建流。但是,如果要使用并行流,您可能会发现性能方面存在一些问题。

为了构建完整的外部连接流实现,我使用了两个阻塞队列。队列与每个流相关联,填充类(可运行的实现)从流中读取数据并将其写入队列。当filler类的数据用完时,它会将流结束标记写入队列。然后,我从AbstractSpliterator构造了一个拆分器。tryAdvance方法实现从左队列和右队列中获取一个值,并根据比较器结果使用或保留这些值。我使用元素类的变体。请参阅以下代码:

import java.util.ArrayList;
import java.util.Collection;

public final class Element<T> implements Comparable<Element<T>> {
    final long id;
    final Collection<T> value;

    public Element(int id, T value) {
        this.id = id;
        // Order preserving
        this.value = new ArrayList<T>();
        this.value.add(value);
    }

    Element(long id, Element<T> e1, Element<T> e2) {
        this.id = id;
        this.value = new ArrayList<T>();
        add(e1);
        add(e2);
    }

    private void add(Element<T> e1) {
        if(e1 == null) {
            this.value.add(null);           
        } else {
            this.value.addAll(e1.value);
        }
    }

    /**
     * Used as End-of-Stream marker 
     */
    Element() {
        id = -1;
        value = null;
    }

    @Override
    public int compareTo(Element<T> o) {
        return Long.compare(id, o.id);
    }
}
import java.util.ArrayList;
导入java.util.Collection;
公共final类元素实现了可比较的{
最终长id;
最终收款价值;
公共元素(int id,T值){
this.id=id;
//保序
this.value=新的ArrayList();
这个.价值.增加(价值);
}
元素(长id、元素e1、元素e2){
this.id=id;
this.value=新的ArrayList();
添加(e1);
添加(e2);
}
专用无效添加(元素e1){
if(e1==null){
this.value.add(null);
}否则{
this.value.addAll(e1.value);
}
}
/**
*用作流结束标记
*/
元素(){
id=-1;
值=空;
}
@凌驾
公共整数比较(元素o){
返回Long.compare(id,o.id);
}
}
加入实现

import java.util.Comparator;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public  class OuterJoinSpliterator<T> extends Spliterators.AbstractSpliterator<Element<T>> {

    private final class Filler implements Runnable {
        private final Stream<Element<T>> stream;
        private final BlockingQueue<Element<T>> queue;

        private Filler(Stream<Element<T>> stream, BlockingQueue<Element<T>> queue) {
            this.stream = stream;
            this.queue = queue;
        }

        @Override
        public void run() {
            stream.forEach(x -> {
                try {
                    queue.put(x);
                } catch (final InterruptedException e) {
                    e.printStackTrace();
                }
            });
            try {
                queue.put(EOS);
            } catch (final InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    public final Element<T> EOS = new Element<T>();
    private final int queueSize;
    private final BlockingQueue<Element<T>> leftQueue;
    private final BlockingQueue<Element<T>> rightQueue;
    protected Element<T> leftValue;
    protected Element<T> rightValue;

    private OuterJoinSpliterator(long estSize, int characteristics, int queueSize,
            Stream<Element<T>> leftStream, Stream<Element<T>> rightStream) {
        super(estSize, characteristics);
        this.queueSize = queueSize;
        leftQueue = createQueue();
        rightQueue = createQueue();
        createFillerThread(leftStream, leftQueue).start();
        createFillerThread(rightStream, rightQueue).start();
    }

    private Element<T> acceptBoth(long id, Element<T> left, Element<T> right) {
        return new Element<T>(id, left, right);
    }

    private final Element<T> acceptLeft(Element<T> left) {
        return acceptBoth(left.id, left, null);
    }

    private final Element<T> acceptRight(Element<T> right) {
        return acceptBoth(right.id, null, right);
    }

    private final Thread createFillerThread(Stream<Element<T>> leftStream, BlockingQueue<Element<T>> queue) {
        return new Thread(new Filler(leftStream, queue));
    }

    private final ArrayBlockingQueue<Element<T>> createQueue() {
        return new ArrayBlockingQueue<>(queueSize);
    }

    @Override
    public Comparator<? super Element<T>> getComparator() {
        return null;
    }

    private final boolean isFinished() {
        return leftValue == EOS && rightValue == EOS;
    }

    @Override
    public final boolean tryAdvance(Consumer<? super Element<T>> action) {
        try {
            updateLeft();

            updateRight();

            if (isFinished()) {
                return false;
            }

            if (leftValue == EOS) {
                action.accept(acceptRight(rightValue));
                rightValue = null;
            } else if (rightValue == EOS) {
                action.accept(acceptLeft(leftValue));
                leftValue = null;
            } else {
                switch (leftValue.compareTo(rightValue)) {
                case -1:
                    action.accept(acceptLeft(leftValue));
                    leftValue = null;
                    break;
                case 1:
                    action.accept(acceptRight(rightValue));
                    rightValue = null;
                    break;
                default:
                    action.accept(acceptBoth(leftValue.id, leftValue, rightValue));
                    leftValue = null;
                    rightValue = null;
                }
            }
        } catch (final InterruptedException e) {
            return false;
        }
        return true;
    }

    private final void updateLeft() throws InterruptedException {
        if (leftValue == null) {
            leftValue = leftQueue.take();
        }
    }

    private final void updateRight() throws InterruptedException {
        if (rightValue == null) {
            rightValue = rightQueue.take();
        }
    }

    public static <T> Stream<Element<T>> join(long estSize, int characteristics, int queueSize, boolean parallel, Stream<Element<T>> leftStream, Stream<Element<T>> rightStream) {
        Spliterator<Element<T>> spliterator = new OuterJoinSpliterator<>(estSize, characteristics, queueSize, leftStream, rightStream);
        return StreamSupport.stream(spliterator, parallel);
    }
}
import java.util.Comparator;
导入java.util.Spliterator;
导入java.util.Spliterators;
导入java.util.concurrent.ArrayBlockingQueue;
导入java.util.concurrent.BlockingQueue;
导入java.util.function.Consumer;
导入java.util.stream.stream;
导入java.util.stream.StreamSupport;
公共类OuterJoinSpliterator扩展Spliterators.AbstractSpliterator{
私有最终类填充器实现可运行{
私有最终流;
私有最终阻塞队列;
专用填充程序(流、阻塞队列){
this.stream=流;
this.queue=队列;
}
@凌驾
公开募捐{
stream.forEach(x->{
试一试{
队列。put(x);
}捕获(最终中断异常e){
e、 printStackTrace();
}
});
试一试{
队列放置(EOS);
}捕获(最终中断异常e){
e、 printStackTrace();
}
}
}
公共最终元素EOS=新元素();
私有最终整数队列大小;
私有最终阻塞队列leftQueue;
私有最终阻塞队列rightQueue;
保护元素左值;
受保护元素值;
专用OuterJoinSpliterator(长estSize、int-characteristics、int-queueSize、,
流左流、流右流){
超级(大小、特性);
this.queueSize=queueSize;
leftQueue=createQueue();
rightQueue=createQueue();
createFillerThread(leftStream,leftQueue).start();
createFillerThread(rightStream,rightQueue).start();
}
私有元素acceptBoth(长id、元素左、元素右){
返回新元素(id、左、右);
}
私有最终元素acceptLeft(元素左){
返回acceptBoth(left.id,left,null);
}
私有最终元素接受权限(元素权限){
返回acceptBoth(right.id,null,right);
}
私有最终线程createFillerThread(流leftStream、BlockingQueue队列){
返回新线程(新填充(leftStream,queue));
}
私有最终ArrayBlockingQueue createQueue(){
返回新的ArrayBlockingQueue(queueSize);
}
@凌驾

公共比较器为了构造一个完整的外部连接流实现,我使用了两个阻塞队列。一个队列与每个流和一个填充类(可运行的实现)相关联从流中读取数据并将其写入队列。当filler类耗尽数据时,它会将流结束标记写入队列。然后,我从AbstractSpliterator中构造一个拆分器。tryAdvance方法实现从左队列和右队列中获取一个值,并根据比较使用或保留这些值或结果。我使用元素类的变体。请参见以下代码:

import java.util.ArrayList;
import java.util.Collection;

public final class Element<T> implements Comparable<Element<T>> {
    final long id;
    final Collection<T> value;

    public Element(int id, T value) {
        this.id = id;
        // Order preserving
        this.value = new ArrayList<T>();
        this.value.add(value);
    }

    Element(long id, Element<T> e1, Element<T> e2) {
        this.id = id;
        this.value = new ArrayList<T>();
        add(e1);
        add(e2);
    }

    private void add(Element<T> e1) {
        if(e1 == null) {
            this.value.add(null);           
        } else {
            this.value.addAll(e1.value);
        }
    }

    /**
     * Used as End-of-Stream marker 
     */
    Element() {
        id = -1;
        value = null;
    }

    @Override
    public int compareTo(Element<T> o) {
        return Long.compare(id, o.id);
    }
}
import java.util.ArrayList;
导入java.util.Collection;
公共final类元素实现了可比较的{
最终长id;
最终收款价值;
公共元素(int id,T值){
this.id=id;
//保序
this.value=新的ArrayList();
这个.价值.增加(价值);
}
元素(长id、元素e1、元素e2){
this.id=id;
这
1, "red",   null, 87.9
2, "green", 28,   21.0
3, null,    9,    107
4, "red"    17,   null
6, "blue",  11,   null
import java.util.ArrayList;
import java.util.Collection;

public final class Element<T> implements Comparable<Element<T>> {
    final long id;
    final Collection<T> value;

    public Element(int id, T value) {
        this.id = id;
        // Order preserving
        this.value = new ArrayList<T>();
        this.value.add(value);
    }

    Element(long id, Element<T> e1, Element<T> e2) {
        this.id = id;
        this.value = new ArrayList<T>();
        add(e1);
        add(e2);
    }

    private void add(Element<T> e1) {
        if(e1 == null) {
            this.value.add(null);           
        } else {
            this.value.addAll(e1.value);
        }
    }

    /**
     * Used as End-of-Stream marker 
     */
    Element() {
        id = -1;
        value = null;
    }

    @Override
    public int compareTo(Element<T> o) {
        return Long.compare(id, o.id);
    }
}
import java.util.Comparator;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public  class OuterJoinSpliterator<T> extends Spliterators.AbstractSpliterator<Element<T>> {

    private final class Filler implements Runnable {
        private final Stream<Element<T>> stream;
        private final BlockingQueue<Element<T>> queue;

        private Filler(Stream<Element<T>> stream, BlockingQueue<Element<T>> queue) {
            this.stream = stream;
            this.queue = queue;
        }

        @Override
        public void run() {
            stream.forEach(x -> {
                try {
                    queue.put(x);
                } catch (final InterruptedException e) {
                    e.printStackTrace();
                }
            });
            try {
                queue.put(EOS);
            } catch (final InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    public final Element<T> EOS = new Element<T>();
    private final int queueSize;
    private final BlockingQueue<Element<T>> leftQueue;
    private final BlockingQueue<Element<T>> rightQueue;
    protected Element<T> leftValue;
    protected Element<T> rightValue;

    private OuterJoinSpliterator(long estSize, int characteristics, int queueSize,
            Stream<Element<T>> leftStream, Stream<Element<T>> rightStream) {
        super(estSize, characteristics);
        this.queueSize = queueSize;
        leftQueue = createQueue();
        rightQueue = createQueue();
        createFillerThread(leftStream, leftQueue).start();
        createFillerThread(rightStream, rightQueue).start();
    }

    private Element<T> acceptBoth(long id, Element<T> left, Element<T> right) {
        return new Element<T>(id, left, right);
    }

    private final Element<T> acceptLeft(Element<T> left) {
        return acceptBoth(left.id, left, null);
    }

    private final Element<T> acceptRight(Element<T> right) {
        return acceptBoth(right.id, null, right);
    }

    private final Thread createFillerThread(Stream<Element<T>> leftStream, BlockingQueue<Element<T>> queue) {
        return new Thread(new Filler(leftStream, queue));
    }

    private final ArrayBlockingQueue<Element<T>> createQueue() {
        return new ArrayBlockingQueue<>(queueSize);
    }

    @Override
    public Comparator<? super Element<T>> getComparator() {
        return null;
    }

    private final boolean isFinished() {
        return leftValue == EOS && rightValue == EOS;
    }

    @Override
    public final boolean tryAdvance(Consumer<? super Element<T>> action) {
        try {
            updateLeft();

            updateRight();

            if (isFinished()) {
                return false;
            }

            if (leftValue == EOS) {
                action.accept(acceptRight(rightValue));
                rightValue = null;
            } else if (rightValue == EOS) {
                action.accept(acceptLeft(leftValue));
                leftValue = null;
            } else {
                switch (leftValue.compareTo(rightValue)) {
                case -1:
                    action.accept(acceptLeft(leftValue));
                    leftValue = null;
                    break;
                case 1:
                    action.accept(acceptRight(rightValue));
                    rightValue = null;
                    break;
                default:
                    action.accept(acceptBoth(leftValue.id, leftValue, rightValue));
                    leftValue = null;
                    rightValue = null;
                }
            }
        } catch (final InterruptedException e) {
            return false;
        }
        return true;
    }

    private final void updateLeft() throws InterruptedException {
        if (leftValue == null) {
            leftValue = leftQueue.take();
        }
    }

    private final void updateRight() throws InterruptedException {
        if (rightValue == null) {
            rightValue = rightQueue.take();
        }
    }

    public static <T> Stream<Element<T>> join(long estSize, int characteristics, int queueSize, boolean parallel, Stream<Element<T>> leftStream, Stream<Element<T>> rightStream) {
        Spliterator<Element<T>> spliterator = new OuterJoinSpliterator<>(estSize, characteristics, queueSize, leftStream, rightStream);
        return StreamSupport.stream(spliterator, parallel);
    }
}