Java 为什么筛选未排序的列表比筛选已排序的列表快
我一直在玩Java8Java 为什么筛选未排序的列表比筛选已排序的列表快,java,lambda,java-8,Java,Lambda,Java 8,我一直在玩Java8Streams-API,我决定使用microbenchmarkstream()和parallelStream()Streams。正如预期的那样,parallelStream()的速度是原来的两倍,但是出现了其他一些问题-如果我在将数据传递到filter之前对数据进行排序,那么传递filter->map->collect结果所需的时间比传递未排序的列表要多5-8倍 未分类 分类 这是密码 所以,我的问题是,为什么过滤未排序的列表要比过滤已排序的列表快 使用未排序列表时,所有元组
Streams-API
,我决定使用microbenchmarkstream()
和parallelStream()
Streams。正如预期的那样,parallelStream()
的速度是原来的两倍,但是出现了其他一些问题-如果我在将数据传递到filter
之前对数据进行排序,那么传递filter->map->collect
结果所需的时间比传递未排序的列表要多5-8倍
未分类
分类
这是密码
所以,我的问题是,为什么过滤未排序的列表要比过滤已排序的列表快
使用未排序列表时,所有元组都会在中访问
记忆顺序。它们已在RAM中连续分配。爱
按顺序访问内存,因为它们可以推测地请求
下一个缓存线,以便它在需要时始终存在
当你对列表进行排序时,你把它按随机顺序排列,因为
您的排序键是随机生成的。这意味着内存
对元组成员的访问是不可预测的。CPU无法预取
内存和对元组的几乎每次访问都是缓存丢失
这是GC内存的一个很好的例子
管理:已分配在一起的数据结构
一起使用,表现非常好。它们的地理位置很好
参考资料
缓存未命中的惩罚超过保存的分支预测
这种情况下的惩罚
这个问题是公认的答案,也回答了我的问题:
当我创建原始
列表时
已排序-即它的元素顺序存储在内存中,执行时间没有差别,当列表
中填充了随机数时,它等于未排序的
版本。我假设您已多次迭代此基准,并计算了所给出数字的平均值和标准偏差。否则你的数字就是垃圾。你知道,电脑有一个调度程序,所以CPU时间和挂钟时间几乎永远不会匹配。但我对确切的数字不感兴趣,但为什么对数据进行排序会减慢速度呢preocess@AndreiAndrei-我很熟悉这个问题,这正是我提问的原因-因为它展示了相反的行为为什么这是重复的?这是关于缓存未命中的问题,而另一个问题是关于分支预测的问题。
(Stream) Elapsed time [ns] : 53733996 (53 ms)
(ParallelStream) Elapsed time [ns] : 25901907 (25 ms)
(Stream) Elapsed time [ns] : 336976149 (336 ms)
(ParallelStream) Elapsed time [ns] : 204781387 (204 ms)
package com.github.svetlinzarev.playground.javalang.lambda;
import static java.lang.Long.valueOf;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.stream.Collectors;
import com.github.svetlinzarev.playground.util.time.Stopwatch;
public class MyFirstLambda {
private static final int ELEMENTS = 1024 * 1024 * 16;
private static List<Integer> getRandom(int nElements) {
final Random random = new Random();
final List<Integer> data = new ArrayList<Integer>(nElements);
for (int i = 0; i < MyFirstLambda.ELEMENTS; i++) {
data.add(random.nextInt(MyFirstLambda.ELEMENTS));
}
return data;
}
private static void benchStream(List<Integer> data) {
final Stopwatch stopwatch = new Stopwatch();
final List<Long> smallLongs = data.stream()
.filter(i -> i.intValue() < 16)
.map(Long::valueOf)
.collect(Collectors.toList());
stopwatch.log("Stream");
System.out.println(smallLongs);
}
private static void benchParallelStream(List<Integer> data) {
final Stopwatch stopwatch = new Stopwatch();
final List<Long> smallLongs = data.parallelStream()
.filter(i -> i.intValue() < 16)
.map(Long::valueOf)
.collect(Collectors.toList());
stopwatch.log("ParallelStream");
System.out.println(smallLongs);
}
public static void main(String[] args) {
final List<Integer> data = MyFirstLambda.getRandom(MyFirstLambda.ELEMENTS);
// Collections.sort(data, (first, second) -> first.compareTo(second)); //<- Sort the data
MyFirstLambda.benchStream(data);
MyFirstLambda.benchParallelStream(data);
MyFirstLambda.benchStream(data);
MyFirstLambda.benchParallelStream(data);
MyFirstLambda.benchStream(data);
MyFirstLambda.benchParallelStream(data);
MyFirstLambda.benchStream(data);
MyFirstLambda.benchParallelStream(data);
MyFirstLambda.benchStream(data);
MyFirstLambda.benchParallelStream(data);
}
}
package com.github.svetlinzarev.playground.javalang.lambda;
import static java.lang.Long.valueOf;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.stream.Collectors;
import com.github.svetlinzarev.playground.util.time.Stopwatch;
public class MyFirstLambda {
private static final int ELEMENTS = 1024 * 1024 * 10;
private static final int SMALLER_THAN = 16;
private static final int WARM_UP_ITERRATIONS = 1000;
private static List<Integer> getRandom(int nElements) {
final Random random = new Random();
final List<Integer> data = new ArrayList<Integer>(nElements);
for (int i = 0; i < MyFirstLambda.ELEMENTS; i++) {
data.add(random.nextInt(MyFirstLambda.ELEMENTS));
}
return data;
}
private static List<Long> filterStream(List<Integer> data) {
final List<Long> smallLongs = data.stream()
.filter(i -> i.intValue() < MyFirstLambda.SMALLER_THAN)
.map(Long::valueOf)
.collect(Collectors.toList());
return smallLongs;
}
private static List<Long> filterParallelStream(List<Integer> data) {
final List<Long> smallLongs = data.parallelStream()
.filter(i -> i.intValue() < MyFirstLambda.SMALLER_THAN)
.map(Long::valueOf)
.collect(Collectors.toList());
return smallLongs;
}
private static long filterAndCount(List<Integer> data) {
return data.stream()
.filter(i -> i.intValue() < MyFirstLambda.SMALLER_THAN)
.count();
}
private static long filterAndCountinParallel(List<Integer> data) {
return data.parallelStream()
.filter(i -> i.intValue() < MyFirstLambda.SMALLER_THAN)
.count();
}
private static void warmUp(List<Integer> data) {
for (int i = 0; i < MyFirstLambda.WARM_UP_ITERRATIONS; i++) {
MyFirstLambda.filterStream(data);
MyFirstLambda.filterParallelStream(data);
MyFirstLambda.filterAndCount(data);
MyFirstLambda.filterAndCountinParallel(data);
}
}
private static void benchmark(List<Integer> data, String message) throws InterruptedException {
System.gc();
Thread.sleep(1000); // Give it enough time to complete the GC cycle
final Stopwatch stopwatch = new Stopwatch();
MyFirstLambda.filterStream(data);
stopwatch.log("Stream: " + message);
System.gc();
Thread.sleep(1000); // Give it enough time to complete the GC cycle
stopwatch.reset();
MyFirstLambda.filterParallelStream(data);
stopwatch.log("ParallelStream: " + message);
System.gc();
Thread.sleep(1000); // Give it enough time to complete the GC cycle
stopwatch.reset();
MyFirstLambda.filterAndCount(data);
stopwatch.log("Count: " + message);
System.gc();
Thread.sleep(1000); // Give it enough time to complete the GC cycle
stopwatch.reset();
MyFirstLambda.filterAndCount(data);
stopwatch.log("Count in parallel: " + message);
}
public static void main(String[] args) throws InterruptedException {
final List<Integer> data = MyFirstLambda.getRandom(MyFirstLambda.ELEMENTS);
MyFirstLambda.warmUp(data);
MyFirstLambda.benchmark(data, "UNSORTED");
Collections.sort(data, (first, second) -> first.compareTo(second));
MyFirstLambda.benchmark(data, "SORTED");
Collections.sort(data, (first, second) -> second.compareTo(first));
MyFirstLambda.benchmark(data, "IN REVERSE ORDER");
}
}
16:09:20.470 [main] INFO c.g.s.playground.util.time.Stopwatch - (Stream: UNSORTED) Elapsed time [ns] : 66812263 (66 ms)
16:09:22.149 [main] INFO c.g.s.playground.util.time.Stopwatch - (ParallelStream: UNSORTED) Elapsed time [ns] : 39580682 (39 ms)
16:09:23.875 [main] INFO c.g.s.playground.util.time.Stopwatch - (Count: UNSORTED) Elapsed time [ns] : 97852866 (97 ms)
16:09:25.537 [main] INFO c.g.s.playground.util.time.Stopwatch - (Count in parallel: UNSORTED) Elapsed time [ns] : 94884189 (94 ms)
16:09:35.608 [main] INFO c.g.s.playground.util.time.Stopwatch - (Stream: SORTED) Elapsed time [ns] : 361717676 (361 ms)
16:09:38.439 [main] INFO c.g.s.playground.util.time.Stopwatch - (ParallelStream: SORTED) Elapsed time [ns] : 150115808 (150 ms)
16:09:41.308 [main] INFO c.g.s.playground.util.time.Stopwatch - (Count: SORTED) Elapsed time [ns] : 338335743 (338 ms)
16:09:44.209 [main] INFO c.g.s.playground.util.time.Stopwatch - (Count in parallel: SORTED) Elapsed time [ns] : 370968432 (370 ms)
16:09:50.693 [main] INFO c.g.s.playground.util.time.Stopwatch - (Stream: IN REVERSE ORDER) Elapsed time [ns] : 352036140 (352 ms)
16:09:53.323 [main] INFO c.g.s.playground.util.time.Stopwatch - (ParallelStream: IN REVERSE ORDER) Elapsed time [ns] : 151044664 (151 ms)
16:09:56.159 [main] INFO c.g.s.playground.util.time.Stopwatch - (Count: IN REVERSE ORDER) Elapsed time [ns] : 359281197 (359 ms)
16:09:58.991 [main] INFO c.g.s.playground.util.time.Stopwatch - (Count in parallel: IN REVERSE ORDER) Elapsed time [ns] : 353177542 (353 ms)