Java 排序时会出现非常奇怪的效率怪癖_Java_Algorithm_Performance_Sorting_Insertion Sort

Java 排序时会出现非常奇怪的效率怪癖

java algorithm performance sorting

Java 排序时会出现非常奇怪的效率怪癖,java,algorithm,performance,sorting,insertion-sort,Java,Algorithm,Performance,Sorting,Insertion Sort,我目前正在学习一门数据结构课程，正如您所料，我们必须做的一件事就是编写一些常见的排序。在编写我的插入排序算法时，我注意到In的运行速度明显快于我的讲师（对于400000个数据点，我的算法大约需要30秒，他的算法大约需要90秒）。我通过电子邮件向他发送了我的代码，当它们都在同一台机器上运行时，同样的结果发生了。我们设法花了40多分钟慢慢地把他的分类方法改成我的，直到一字不差地一模一样，除了一件看似武断的事情。首先，这是我的插入排序代码： public static int[] insertionS

我目前正在学习一门数据结构课程，正如您所料，我们必须做的一件事就是编写一些常见的排序。在编写我的插入排序算法时，我注意到In的运行速度明显快于我的讲师（对于400000个数据点，我的算法大约需要30秒，他的算法大约需要90秒）。我通过电子邮件向他发送了我的代码，当它们都在同一台机器上运行时，同样的结果发生了。我们设法花了40多分钟慢慢地把他的分类方法改成我的，直到一字不差地一模一样，除了一件看似武断的事情。首先，这是我的插入排序代码：

public static int[] insertionSort(int[] A){

    //Check for illegal cases
    if (A == null || A.length == 0){

        throw new IllegalArgumentException("A is not populated");

    }

    for(int i = 0; i < A.length; i++){

        int j = i;

        while(j > 0 && A[j - 1] > A[j]){

            int temp = A[j];
            A[j] = A[j - 1];
            A[j - 1] = temp;

            j--;

        }

    }

    return A;

}

我们发现这三条线是罪魁祸首。因此，我的代码运行速度明显加快。困惑的是，我们运行了

javap-c

来获取一个简单程序的字节码，这个程序只有一个

main

，其中包含一个数组声明，一个

intj

的变量声明，以及3行代码，以便在我编写和他编写时进行交换。以下是我的交换方法的字节码：

    Compiled from "me.java"
public class me {
  public me();
    Code:
       0: aload_0
       1: invokespecial #1                  // Method java/lang/Object."<init>":()V
       4: return

  public static void main(java.lang.String[]);
    Code:
       0: sipush        10000
       3: newarray       int
       5: astore_1
       6: bipush        10
       8: istore_2
       9: aload_1
      10: iload_2
      11: iaload
      12: istore_3
      13: aload_1
      14: iload_2
      15: aload_1
      16: iload_2
      17: iconst_1
      18: isub
      19: iaload
      20: iastore
      21: aload_1
      22: iload_2
      23: iconst_1
      24: isub
      25: iload_3
      26: iastore
      27: return
}

它们作为两个不同JVM中的两个独立文件运行。

TL；博士你的实验无效，有许多变量可能会影响结果。最好使用卡尺或JMH等微基准标记工具。我用这样的工具来检查

你的和你教授的差别可以忽略不计

实验对于我的实验，我有745038个数据点。我创建了3个测试，你的、你的讲师的版本和作为JDK一部分的

Arrays.sort（）

根据结果，您的运行时间为：1419867.808 ns 您的指导老师是：1429798.824 ns

所以我们说的是0.01毫秒

教练只是在两次跑步之间的差异较小

JDK Arrays.sort（）的速度慢了一个更大的数量级，为1779042.513纳秒，比您的慢了0.300毫秒

下面是我在下面的卡尺中用来做微基准的代码

package net.trajano.caliper.test;

import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import com.google.caliper.BeforeExperiment;
import com.google.caliper.Benchmark;
import com.google.caliper.api.VmOptions;
import com.google.caliper.runner.CaliperMain;

@VmOptions("-XX:-TieredCompilation")
public class SortBenchmark {

    public static int[] insertionSort(final int[] A) {

        // Check for illegal cases
        if (A == null || A.length == 0) {

            throw new IllegalArgumentException("A is not populated");

        }

        for (int i = 0; i < A.length; i++) {

            int j = i;

            while (j > 0 && A[j - 1] > A[j]) {

                final int temp = A[j - 1];
                A[j - 1] = A[j];
                A[j] = temp;

                j--;

            }

        }

        return A;

    }

    public static int[] insertionSortInstructor(final int[] A) {

        // Check for illegal cases
        if (A == null || A.length == 0) {

            throw new IllegalArgumentException("A is not populated");

        }

        for (int i = 0; i < A.length; i++) {

            int j = i;

            while (j > 0 && A[j - 1] > A[j]) {

                final int temp = A[j];
                A[j] = A[j - 1];
                A[j - 1] = temp;

                j--;

            }

        }

        return A;

    }

    @BeforeExperiment
    void setUp() throws IOException {
        try (final DataInputStream dis = new DataInputStream(
                Files.newInputStream(Paths.get("C:/Program Files/iTunes/iTunes.exe")))) {
            final List<Integer> list = new ArrayList<Integer>();
            while (true) {
                try {
                    list.add(dis.readInt());
                } catch (final EOFException e) {
                    break;
                }
            }

            data = list.stream().mapToInt(i -> i).toArray();
            System.out.println("Data size = " + data.length);
        }
    }

    // data to sort
    private static int[] data;

    @Benchmark
    public void insertionSort(final int reps) {
        for (int i = 0; i < reps; i++) {
            insertionSort(data);
        }
    }

    @Benchmark
    public void insertionSortInstructor(final int reps) {
        for (int i = 0; i < reps; i++) {
            insertionSortInstructor(data);
        }
    }

    @Benchmark
    public void jdkSort(final int reps) {
        for (int i = 0; i < reps; i++) {
            Arrays.sort(data);
        }
    }

    public static void main(final String[] args) {
        CaliperMain.main(SortBenchmark.class, args);
    }
}

package net.trajano.caliper.test；
导入java.io.DataInputStream；
导入java.io.EOFException；
导入java.io.IOException；
导入java.nio.file.Files；
导入java.nio.file.path；
导入java.util.ArrayList；
导入java.util.array；
导入java.util.List；
实验前导入com.google.caliper.before；
导入com.google.caliper.Benchmark；
导入com.google.caliper.api.VmOptions；
导入com.google.caliper.runner.CaliperMain；
@VmOptions（“-XX:-分层编译”）
公共类分类标号{
公共静态int[]插入排序（最终int[]A）{
//检查违法案件
如果（A==null | | A.length==0）{
抛出新的IllegalArgumentException（“未填充A”）；
}
for（int i=0；i0&&A[j-1]>A[j]）{
最终内部温度=A[j-1]；
A[j-1]=A[j]；
A[j]=温度；
j--；
}
}
返回A；
}
公共静态int[]插入或构造函数（最终int[]A）{
//检查违法案件
如果（A==null | | A.length==0）{
抛出新的IllegalArgumentException（“未填充A”）；
}
for（int i=0；i0&&A[j-1]>A[j]）{
最终内部温度=A[j]；
A[j]=A[j-1]；
A[j-1]=温度；
j--；
}
}
返回A；
}
@实验前
void setUp（）引发IOException{
try（final DataInputStream dis=new DataInputStream(
Files.newInputStream（path.get（“C:/Program Files/iTunes/iTunes.exe”））{
最终列表=新的ArrayList（）；
while（true）{
试一试{
list.add（dis.readInt（））；
}捕获（最终EOFEException e）{
打破
}
}
data=list.stream（）.mapToInt（i->i.toArray（）；
System.out.println（“数据大小=“+Data.length”）；
}
}
//要排序的数据
私有静态int[]数据；
@基准
公共void insertionSort（最终int reps）{
对于（int i=0；i


在一边
老实说，我对结果感到惊讶，JDK的速度变慢了。所以我看了一眼。JDK似乎根据阈值使用了三种排序算法（合并排序、小于286个元素的快速排序和小于47个元素的插入排序）
由于我拥有的数据集一开始就相当大，所以合并排序首先进行，它具有O（n）空间复杂性，即数组的第二个副本。因此，可能是额外的堆分配导致了额外的时间。
这是循环展开优化以及常见的影响
子表达式消除。根据阵列访问指令的顺序，JIT可以在一种情况下消除冗余负载，但在另一种情况下不能
让我详细解释一下。在这两种情况下，JIT都会展开内部循环的4次迭代
例如，对于您的情况：
    while (j > 3) {
        if (A[j - 1] > A[j]) {
            int temp = A[j];
            A[j] = A[j - 1];
            A[j - 1] = temp;         \
        }                             A[j - 1] loaded immediately after store
        if (A[j - 2] > A[j - 1]) {   /
            int temp = A[j - 1];
            A[j - 1] = A[j - 2];
            A[j - 2] = temp;         \
        }                             A[j - 2] loaded immediately after store
        if (A[j - 3] > A[j - 2]) {   /
            int temp = A[j - 2];
            A[j - 2] = A[j - 3];
            A[j - 3] = temp;         \
        }                             A[j - 3] loaded immediately after store
        if (A[j - 4] > A[j - 3]) {   /
            int temp = A[j - 3];
            A[j - 3] = A[j - 4];
            A[j - 4] = temp;
        }
        j -= 4;
    }

然后JIT消除了冗余阵列负载，生成的程序集如下所示
0x0000000002d53a70: movslq %r11d,%r10
0x0000000002d53a73: lea    0x0(%rbp,%r10,4),%r10
0x0000000002d53a78: mov    0x10(%r10),%ebx    ; ebx = A[j]
0x0000000002d53a7c: mov    0xc(%r10),%r9d     ; r9d = A[j - 1]

0x0000000002d53a80: cmp    %ebx,%r9d          ; if (r9d > ebx) {
0x0000000002d53a83: jle    0x0000000002d539f3 
0x0000000002d53a89: mov    %r9d,0x10(%r10)    ;     A[j] = r9d
0x0000000002d53a8d: mov    %ebx,0xc(%r10)     ;     A[j - 1] = ebx
                                              ; }
0x0000000002d53a91: mov    0x8(%r10),%r9d     ; r9d = A[j - 2]

0x0000000002d53a95: cmp    %ebx,%r9d          ; if (r9d > ebx) {  
0x0000000002d53a98: jle    0x0000000002d539f3                     
0x0000000002d53a9e: mov    %r9d,0xc(%r10)     ;     A[j - 1] = r9d    
0x0000000002d53aa2: mov    %ebx,0x8(%r10)     ;     A[j - 2] = ebx
                                              ; }             
0x0000000002d53aa6: mov    0x4(%r10),%r9d     ; r9d = A[j - 3]    

0x0000000002d53aaa: cmp    %ebx,%r9d          ; if (r9d > ebx) {  
0x0000000002d53aad: jle    0x0000000002d539f3                     
0x0000000002d53ab3: mov    %r9d,0x8(%r10)     ;     A[j - 2] = r9d
0x0000000002d53ab7: mov    %ebx,0x4(%r10)     ;     A[j - 3] = ebx
                                              ; }                 
0x0000000002d53abb: mov    (%r10),%r8d        ; r8d = A[j - 4]

0x0000000002d53abe: cmp    %ebx,%r8d          ; if (r8d > ebx) {
0x0000000002d53ac1: jle    0x0000000002d539f3  
0x0000000002d53ac7: mov    %r8d,0x4(%r10)     ;     A[j - 3] = r8
0x0000000002d53acb: mov    %ebx,(%r10)        ;     A[j - 4] = ebx
                                              ; }
0x0000000002d53ace: add    $0xfffffffc,%r11d  ; j -= 4
0x0000000002d53ad2: cmp    $0x3,%r11d         ; while (j > 3)
0x0000000002d53ad6: jg     0x0000000002d53a70

循环展开后，讲师的代码看起来会有所不同：
    while (j > 3) {
        if (A[j - 1] > A[j]) {
            int temp = A[j - 1];
            A[j - 1] = A[j];
            A[j] = temp;         <-- another store instruction between A[j - 1] access
        }
        if (A[j - 2] > A[j - 1]) {
            int temp = A[j - 2];
            A[j - 2] = A[j - 1];
            A[j - 1] = temp;
        }
        ...

请注意，如果在禁用循环展开优化的情况下运行JVM（-XX:LoopUnrollLimit=0），则bot的性能
My insertion sort took 37680.0 milliseconds.
Other insertion sort took 86358.0 milliseconds.

package net.trajano.caliper.test;

import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import com.google.caliper.BeforeExperiment;
import com.google.caliper.Benchmark;
import com.google.caliper.api.VmOptions;
import com.google.caliper.runner.CaliperMain;

@VmOptions("-XX:-TieredCompilation")
public class SortBenchmark {

    public static int[] insertionSort(final int[] A) {

        // Check for illegal cases
        if (A == null || A.length == 0) {

            throw new IllegalArgumentException("A is not populated");

        }

        for (int i = 0; i < A.length; i++) {

            int j = i;

            while (j > 0 && A[j - 1] > A[j]) {

                final int temp = A[j - 1];
                A[j - 1] = A[j];
                A[j] = temp;

                j--;

            }

        }

        return A;

    }

    public static int[] insertionSortInstructor(final int[] A) {

        // Check for illegal cases
        if (A == null || A.length == 0) {

            throw new IllegalArgumentException("A is not populated");

        }

        for (int i = 0; i < A.length; i++) {

            int j = i;

            while (j > 0 && A[j - 1] > A[j]) {

                final int temp = A[j];
                A[j] = A[j - 1];
                A[j - 1] = temp;

                j--;

            }

        }

        return A;

    }

    @BeforeExperiment
    void setUp() throws IOException {
        try (final DataInputStream dis = new DataInputStream(
                Files.newInputStream(Paths.get("C:/Program Files/iTunes/iTunes.exe")))) {
            final List<Integer> list = new ArrayList<Integer>();
            while (true) {
                try {
                    list.add(dis.readInt());
                } catch (final EOFException e) {
                    break;
                }
            }

            data = list.stream().mapToInt(i -> i).toArray();
            System.out.println("Data size = " + data.length);
        }
    }

    // data to sort
    private static int[] data;

    @Benchmark
    public void insertionSort(final int reps) {
        for (int i = 0; i < reps; i++) {
            insertionSort(data);
        }
    }

    @Benchmark
    public void insertionSortInstructor(final int reps) {
        for (int i = 0; i < reps; i++) {
            insertionSortInstructor(data);
        }
    }

    @Benchmark
    public void jdkSort(final int reps) {
        for (int i = 0; i < reps; i++) {
            Arrays.sort(data);
        }
    }

    public static void main(final String[] args) {
        CaliperMain.main(SortBenchmark.class, args);
    }
}

    while (j > 3) {
        if (A[j - 1] > A[j]) {
            int temp = A[j];
            A[j] = A[j - 1];
            A[j - 1] = temp;         \
        }                             A[j - 1] loaded immediately after store
        if (A[j - 2] > A[j - 1]) {   /
            int temp = A[j - 1];
            A[j - 1] = A[j - 2];
            A[j - 2] = temp;         \
        }                             A[j - 2] loaded immediately after store
        if (A[j - 3] > A[j - 2]) {   /
            int temp = A[j - 2];
            A[j - 2] = A[j - 3];
            A[j - 3] = temp;         \
        }                             A[j - 3] loaded immediately after store
        if (A[j - 4] > A[j - 3]) {   /
            int temp = A[j - 3];
            A[j - 3] = A[j - 4];
            A[j - 4] = temp;
        }
        j -= 4;
    }

0x0000000002d53a70: movslq %r11d,%r10
0x0000000002d53a73: lea    0x0(%rbp,%r10,4),%r10
0x0000000002d53a78: mov    0x10(%r10),%ebx    ; ebx = A[j]
0x0000000002d53a7c: mov    0xc(%r10),%r9d     ; r9d = A[j - 1]

0x0000000002d53a80: cmp    %ebx,%r9d          ; if (r9d > ebx) {
0x0000000002d53a83: jle    0x0000000002d539f3 
0x0000000002d53a89: mov    %r9d,0x10(%r10)    ;     A[j] = r9d
0x0000000002d53a8d: mov    %ebx,0xc(%r10)     ;     A[j - 1] = ebx
                                              ; }
0x0000000002d53a91: mov    0x8(%r10),%r9d     ; r9d = A[j - 2]

0x0000000002d53a95: cmp    %ebx,%r9d          ; if (r9d > ebx) {  
0x0000000002d53a98: jle    0x0000000002d539f3                     
0x0000000002d53a9e: mov    %r9d,0xc(%r10)     ;     A[j - 1] = r9d    
0x0000000002d53aa2: mov    %ebx,0x8(%r10)     ;     A[j - 2] = ebx
                                              ; }             
0x0000000002d53aa6: mov    0x4(%r10),%r9d     ; r9d = A[j - 3]    

0x0000000002d53aaa: cmp    %ebx,%r9d          ; if (r9d > ebx) {  
0x0000000002d53aad: jle    0x0000000002d539f3                     
0x0000000002d53ab3: mov    %r9d,0x8(%r10)     ;     A[j - 2] = r9d
0x0000000002d53ab7: mov    %ebx,0x4(%r10)     ;     A[j - 3] = ebx
                                              ; }                 
0x0000000002d53abb: mov    (%r10),%r8d        ; r8d = A[j - 4]

0x0000000002d53abe: cmp    %ebx,%r8d          ; if (r8d > ebx) {
0x0000000002d53ac1: jle    0x0000000002d539f3  
0x0000000002d53ac7: mov    %r8d,0x4(%r10)     ;     A[j - 3] = r8
0x0000000002d53acb: mov    %ebx,(%r10)        ;     A[j - 4] = ebx
                                              ; }
0x0000000002d53ace: add    $0xfffffffc,%r11d  ; j -= 4
0x0000000002d53ad2: cmp    $0x3,%r11d         ; while (j > 3)
0x0000000002d53ad6: jg     0x0000000002d53a70

    while (j > 3) {
        if (A[j - 1] > A[j]) {
            int temp = A[j - 1];
            A[j - 1] = A[j];
            A[j] = temp;         <-- another store instruction between A[j - 1] access
        }
        if (A[j - 2] > A[j - 1]) {
            int temp = A[j - 2];
            A[j - 2] = A[j - 1];
            A[j - 1] = temp;
        }
        ...

0x0000000002b53a00: cmp    %r8d,%r10d          ; if (r10d > r8d) {
0x0000000002b53a03: jle    0x0000000002b53973
0x0000000002b53a09: mov    %r8d,0xc(%rbx)      ;     A[j - 1] = r8d
0x0000000002b53a0d: mov    %r10d,0x10(%rbx)    ;     A[j] = r10d
                                               ; }
0x0000000002b53a11: mov    0xc(%rbx),%r10d     ; r10d = A[j - 1]
0x0000000002b53a15: mov    0x8(%rbx),%r9d      ; r9d = A[j - 2]

0x0000000002b53a19: cmp    %r10d,%r9d          ; if (r9d > r10d) {
0x0000000002b53a1c: jle    0x0000000002b53973
0x0000000002b53a22: mov    %r10d,0x8(%rbx)     ;     A[j - 2] = r10d
0x0000000002b53a26: mov    %r9d,0xc(%rbx)      ;     A[j - 1] = r9d    
                                               ; }
0x0000000002b53a2a: mov    0x8(%rbx),%r8d      ; r8d = A[j - 2]
0x0000000002b53a2e: mov    0x4(%rbx),%r10d     ; r10d = A[j - 3]