Java 8u40 Math.round()非常慢
我有一个用Java8编写的相当简单的嗜好项目,它在一种操作模式中大量使用重复的Math.round()调用。例如,其中一种模式通过ExecutorService生成4个线程并将48个可运行任务排队,每个可运行任务运行类似于以下代码块的2^31次:Java 8u40 Math.round()非常慢,java,jvm,jit,Java,Jvm,Jit,我有一个用Java8编写的相当简单的嗜好项目,它在一种操作模式中大量使用重复的Math.round()调用。例如,其中一种模式通过ExecutorService生成4个线程并将48个可运行任务排队,每个可运行任务运行类似于以下代码块的2^31次: int3 = Math.round(float1 + float2); int3 = Math.round(float1 * float2); int3 = Math.round(float1 / float2); 事实并非如此(涉及到数组和嵌套循环
int3 = Math.round(float1 + float2);
int3 = Math.round(float1 * float2);
int3 = Math.round(float1 / float2);
事实并非如此(涉及到数组和嵌套循环),但您已经了解了这一点。无论如何,在Java8U40之前,类似于上面的代码可以在AMDA10-7700k上大约13秒钟内完成1030亿个指令块的完整运行。使用Java8U40,做同样的事情大约需要260秒。没有代码更改,什么都没有,只是一个Java更新
有没有人注意到Math.round()的速度越来越慢,特别是当它被反复使用时?这几乎就好像JVM在做某种优化之前,它已经不再做了。也许它在8u40之前使用SIMD,现在不是了
编辑:我已完成第二次尝试MVCE。您可以在此处下载第一次尝试:
下面是第二次尝试。我的第一次尝试被从这篇文章中删除了,因为它被认为太长了,而且很容易被JVM进行死代码删除优化(这显然在8u40中很少发生)
导入java.util.concurrent.ExecutorService;
导入java.util.concurrent.Executors;
公共类MathRoundMVCE
{
静态总长度=0;
静态长总和=0;
静态浮点[]浮点4=新浮点[128];
静态浮点[]浮点5=新浮点[128];
静态int[]int6=新int[128];
静态int[]int7=新int[128];
静态int[]int8=新int[128];
静态长[]长数组=新长[480];
最终静态整数mil=1000000;
公共静态void main(字符串[]args)
{
initmain数组();
OmniCode omni=新的OmniCode();
grandtotal=omni.runloops()/mil;
System.out.println(“操作总数为”+sumtotal);
System.out.println(“总执行时间为“+grandtotal+”毫秒”);
}
公共静态长SIFTARAY(长[]larray)
{
长topnum=0;
长tempnum=0;
对于(短i=0;i0)
{
topnum+=tempnum;
}
}
topnum=topnum/Runtime.getRuntime().availableProcessors();
返回topnum;
}
公共静态void initmain数组()
{
int k=0;
做
{
float4[k]=(float)(Math.random()*12)+1f;
float5[k]=(float)(Math.random()*12)+1f;
int6[k]=0;
k++;
}
k<128;
}
}
类OmniCode扩展线程
{
挥发性长总时间=0;
最终国际标准=16777216;
最终内部预热=200000;
字节线程=0;
公共长运行循环()
{
此.setPriority(最小优先级);
线程=(字节)Runtime.getRuntime().availableProcessors();
ExecutorService executor=Executors.newFixedThreadPool(线程);
对于(短j=0;j<48;j++)
{
execute(新一轮floatToIntalternate(预热,(字节)j));
}
executor.shutdown();
而(!executor.isTerminated())
{
尝试
{
睡眠(100);
}
捕捉(中断异常e)
{
//无所事事
}
}
executor=Executors.newFixedThreadPool(线程);
对于(短j=0;j<48;j++)
{
execute(新的RoundFloatToIntAlternate(标准,(字节)j));
}
executor.shutdown();
而(!executor.isTerminated())
{
尝试
{
睡眠(100);
}
捕捉(中断异常e)
{
//无所事事
}
}
totaltime=MathRoundMVCE.siftarray(MathRoundMVCE.longarray);
执行者=空;
Runtime.getRuntime().gc();
返回总时间;
}
}
类RoundFloatToIntAlternate扩展了线程
{
int i=0;
int j=0;
int3=0;
int迭代次数=0;
字节线程=0;
公共RoundFloatToIntAlternate(整数周期,字节线程数)
{
迭代=周期;
螺纹=螺纹编号;
}
公开募捐
{
本条规定了优先权(9);
MathRoundMVCE.longarray[this.thread]=0;
mainloop();
blankloop();
}
公共空白循环()
{
j=0;
长定时器=0;
长totaltimer=0;
做
{
计时器=System.nanoTime();
i=0;
做
{
i++;
}
而(i<128);
totaltimer+=System.nanoTime()-计时器;
j++;
}
而(j<迭代次数);
MathRoundMVCE.longarray[this.thread]=totaltimer;
}
公共void mainloop()
{
j=0;
长定时器=0;
长totaltimer=0;
长localsum=0;
int[]int6=新的int[128];
int[]int7=新的int[128];
int[]int8=新的int[128];
做
{
计时器=System.nanoTime();
i=0;
做
{
int6[i]=Math.round(MathRoundMVCE.float4[i]+MathRoundMVCE.float5[i]);
int7[i]=Math.round(MathRoundMVCE.float4[i]*MathRoundMVCE.float5[i]);
int8[i]=Math.round(MathRoundMVCE.float4[i]/MathRoundMVCE.float5[i]);
i++;
}
而(i<128);
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class MathRoundMVCE
{
static long grandtotal = 0;
static long sumtotal = 0;
static float[] float4 = new float[128];
static float[] float5 = new float[128];
static int[] int6 = new int[128];
static int[] int7 = new int[128];
static int[] int8 = new int[128];
static long[] longarray = new long[480];
final static int mil = 1000000;
public static void main(String[] args)
{
initmainarrays();
OmniCode omni = new OmniCode();
grandtotal = omni.runloops() / mil;
System.out.println("Total sum of operations is " + sumtotal);
System.out.println("Total execution time is " + grandtotal + " milliseconds");
}
public static long siftarray(long[] larray)
{
long topnum = 0;
long tempnum = 0;
for (short i = 0; i < larray.length; i++)
{
tempnum = larray[i];
if (tempnum > 0)
{
topnum += tempnum;
}
}
topnum = topnum / Runtime.getRuntime().availableProcessors();
return topnum;
}
public static void initmainarrays()
{
int k = 0;
do
{
float4[k] = (float)(Math.random() * 12) + 1f;
float5[k] = (float)(Math.random() * 12) + 1f;
int6[k] = 0;
k++;
}
while (k < 128);
}
}
class OmniCode extends Thread
{
volatile long totaltime = 0;
final int standard = 16777216;
final int warmup = 200000;
byte threads = 0;
public long runloops()
{
this.setPriority(MIN_PRIORITY);
threads = (byte)Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(threads);
for (short j = 0; j < 48; j++)
{
executor.execute(new RoundFloatToIntAlternate(warmup, (byte)j));
}
executor.shutdown();
while (!executor.isTerminated())
{
try
{
Thread.sleep(100);
}
catch (InterruptedException e)
{
//Do nothing
}
}
executor = Executors.newFixedThreadPool(threads);
for (short j = 0; j < 48; j++)
{
executor.execute(new RoundFloatToIntAlternate(standard, (byte)j));
}
executor.shutdown();
while (!executor.isTerminated())
{
try
{
Thread.sleep(100);
}
catch (InterruptedException e)
{
//Do nothing
}
}
totaltime = MathRoundMVCE.siftarray(MathRoundMVCE.longarray);
executor = null;
Runtime.getRuntime().gc();
return totaltime;
}
}
class RoundFloatToIntAlternate extends Thread
{
int i = 0;
int j = 0;
int int3 = 0;
int iterations = 0;
byte thread = 0;
public RoundFloatToIntAlternate(int cycles, byte threadnumber)
{
iterations = cycles;
thread = threadnumber;
}
public void run()
{
this.setPriority(9);
MathRoundMVCE.longarray[this.thread] = 0;
mainloop();
blankloop();
}
public void blankloop()
{
j = 0;
long timer = 0;
long totaltimer = 0;
do
{
timer = System.nanoTime();
i = 0;
do
{
i++;
}
while (i < 128);
totaltimer += System.nanoTime() - timer;
j++;
}
while (j < iterations);
MathRoundMVCE.longarray[this.thread] -= totaltimer;
}
public void mainloop()
{
j = 0;
long timer = 0;
long totaltimer = 0;
long localsum = 0;
int[] int6 = new int[128];
int[] int7 = new int[128];
int[] int8 = new int[128];
do
{
timer = System.nanoTime();
i = 0;
do
{
int6[i] = Math.round(MathRoundMVCE.float4[i] + MathRoundMVCE.float5[i]);
int7[i] = Math.round(MathRoundMVCE.float4[i] * MathRoundMVCE.float5[i]);
int8[i] = Math.round(MathRoundMVCE.float4[i] / MathRoundMVCE.float5[i]);
i++;
}
while (i < 128);
totaltimer += System.nanoTime() - timer;
for(short z = 0; z < 128; z++)
{
localsum += int6[z] + int7[z] + int8[z];
}
j++;
}
while (j < iterations);
MathRoundMVCE.longarray[this.thread] += totaltimer;
MathRoundMVCE.sumtotal = localsum;
}
}
public class MathTime {
static float[][] float1 = new float[8][16];
static float[][] float2 = new float[8][16];
public static void main(String[] args) {
for (int j = 0; j < 8; j++) {
for (int k = 0; k < 16; k++) {
float1[j][k] = (float) (j + k);
float2[j][k] = (float) (j + k);
}
}
new Test().run();
}
private static class Test {
int int3;
public void run() {
for (String test : new String[] { "warmup", "real" }) {
long t0 = System.nanoTime();
for (int count = 0; count < 1e7; count++) {
int i = count % 8;
int3 += Math.round(float1[i][0] + float2[i][0]);
int3 += Math.round(float1[i][1] + float2[i][1]);
int3 += Math.round(float1[i][2] + float2[i][2]);
int3 += Math.round(float1[i][3] + float2[i][3]);
int3 += Math.round(float1[i][4] + float2[i][4]);
int3 += Math.round(float1[i][5] + float2[i][5]);
int3 += Math.round(float1[i][6] + float2[i][6]);
int3 += Math.round(float1[i][7] + float2[i][7]);
int3 += Math.round(float1[i][8] + float2[i][8]);
int3 += Math.round(float1[i][9] + float2[i][9]);
int3 += Math.round(float1[i][10] + float2[i][10]);
int3 += Math.round(float1[i][11] + float2[i][11]);
int3 += Math.round(float1[i][12] + float2[i][12]);
int3 += Math.round(float1[i][13] + float2[i][13]);
int3 += Math.round(float1[i][14] + float2[i][14]);
int3 += Math.round(float1[i][15] + float2[i][15]);
int3 += Math.round(float1[i][0] * float2[i][0]);
int3 += Math.round(float1[i][1] * float2[i][1]);
int3 += Math.round(float1[i][2] * float2[i][2]);
int3 += Math.round(float1[i][3] * float2[i][3]);
int3 += Math.round(float1[i][4] * float2[i][4]);
int3 += Math.round(float1[i][5] * float2[i][5]);
int3 += Math.round(float1[i][6] * float2[i][6]);
int3 += Math.round(float1[i][7] * float2[i][7]);
int3 += Math.round(float1[i][8] * float2[i][8]);
int3 += Math.round(float1[i][9] * float2[i][9]);
int3 += Math.round(float1[i][10] * float2[i][10]);
int3 += Math.round(float1[i][11] * float2[i][11]);
int3 += Math.round(float1[i][12] * float2[i][12]);
int3 += Math.round(float1[i][13] * float2[i][13]);
int3 += Math.round(float1[i][14] * float2[i][14]);
int3 += Math.round(float1[i][15] * float2[i][15]);
int3 += Math.round(float1[i][0] / float2[i][0]);
int3 += Math.round(float1[i][1] / float2[i][1]);
int3 += Math.round(float1[i][2] / float2[i][2]);
int3 += Math.round(float1[i][3] / float2[i][3]);
int3 += Math.round(float1[i][4] / float2[i][4]);
int3 += Math.round(float1[i][5] / float2[i][5]);
int3 += Math.round(float1[i][6] / float2[i][6]);
int3 += Math.round(float1[i][7] / float2[i][7]);
int3 += Math.round(float1[i][8] / float2[i][8]);
int3 += Math.round(float1[i][9] / float2[i][9]);
int3 += Math.round(float1[i][10] / float2[i][10]);
int3 += Math.round(float1[i][11] / float2[i][11]);
int3 += Math.round(float1[i][12] / float2[i][12]);
int3 += Math.round(float1[i][13] / float2[i][13]);
int3 += Math.round(float1[i][14] / float2[i][14]);
int3 += Math.round(float1[i][15] / float2[i][15]);
}
long t1 = System.nanoTime();
System.out.println(int3);
System.out.println(String.format("%s, Math.round(float), %s, %.1f ms", System.getProperty("java.version"), test, (t1 - t0) / 1e6));
}
}
}
}
adam@brimstone:~$ ./jdk1.8.0_40/bin/javac MathTime.java;./jdk1.8.0_40/bin/java -cp . MathTime
1.8.0_40, Math.round(float), warmup, 6846.4 ms
1.8.0_40, Math.round(float), real, 6058.6 ms
adam@brimstone:~$ ./jdk1.8.0_31/bin/javac MathTime.java;./jdk1.8.0_31/bin/java -cp . MathTime
1.8.0_31, Math.round(float), warmup, 5717.9 ms
1.8.0_31, Math.round(float), real, 5282.7 ms
adam@brimstone:~$ ./jdk1.8.0_25/bin/javac MathTime.java;./jdk1.8.0_25/bin/java -cp . MathTime
1.8.0_25, Math.round(float), warmup, 5702.4 ms
1.8.0_25, Math.round(float), real, 5262.2 ms
diff jdk1.8.0_31/src/java/lang/Math.java jdk1.8.0_40/src/java/lang/Math.java
-no differences-
public static int round(float a) {
int intBits = Float.floatToRawIntBits(a);
int biasedExp = (intBits & FloatConsts.EXP_BIT_MASK)
>> (FloatConsts.SIGNIFICAND_WIDTH - 1);
int shift = (FloatConsts.SIGNIFICAND_WIDTH - 2
+ FloatConsts.EXP_BIAS) - biasedExp;
if ((shift & -32) == 0) { // shift >= 0 && shift < 32
// a is a finite number such that pow(2,-32) <= ulp(a) < 1
int r = ((intBits & FloatConsts.SIGNIF_BIT_MASK)
| (FloatConsts.SIGNIF_BIT_MASK + 1));
if (intBits < 0) {
r = -r;
}
// In the comments below each Java expression evaluates to the value
// the corresponding mathematical expression:
// (r) evaluates to a / ulp(a)
// (r >> shift) evaluates to floor(a * 2)
// ((r >> shift) + 1) evaluates to floor((a + 1/2) * 2)
// (((r >> shift) + 1) >> 1) evaluates to floor(a + 1/2)
return ((r >> shift) + 1) >> 1;
} else {
// a is either
// - a finite number with abs(a) < exp(2,FloatConsts.SIGNIFICAND_WIDTH-32) < 1/2
// - a finite number with ulp(a) >= 1 and hence a is a mathematical integer
// - an infinity or NaN
return (int) a;
}
}
int result = 0;
long t0 = System.currentTimeMillis();
for (int i = 0; i < 1e9; i++) {
result += Math.round((float) i / (float) (i + 1));
}
long t1 = System.currentTimeMillis();
System.out.println("result = " + result);
System.out.println(String.format("%s, Math.round(float), %.1f ms", System.getProperty("java.version"), (t1 - t0)/1f));
result = 999999999
1.8.0_25, Math.round(float), 5251.0 ms
result = 999999999
1.8.0_40, Math.round(float), 3903.0 ms
It took 401772 milliseconds to complete edu.jvm.runtime.RoundFloatToInt. <==== 1.8.0_40
It took 410767 milliseconds to complete edu.jvm.runtime.RoundFloatToInt. <==== 1.8.0_25
package org.openjdk.jmh.samples;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.VerboseMode;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS)
public class RoundBench {
float[] floats;
int i;
@Setup
public void initI() {
Random random = new Random(0xDEAD_BEEF);
floats = new float[8096];
for (int i = 0; i < floats.length; i++) {
floats[i] = random.nextFloat();
}
}
@Benchmark
public float baseline() {
i++;
i = i & 0xFFFFFF00;
return floats[i];
}
@Benchmark
public int round() {
i++;
i = i & 0xFFFFFF00;
return Math.round(floats[i]);
}
public static void main(String[] args) throws RunnerException {
Options options = new OptionsBuilder()
.include(RoundBench.class.getName())
.build();
new Runner(options).run();
}
}
1.8.0_25
Benchmark Mode Cnt Score Error Units
RoundBench.baseline avgt 6 2.565 ± 0.028 ns/op
RoundBench.round avgt 6 4.459 ± 0.065 ns/op
1.8.0_40
Benchmark Mode Cnt Score Error Units
RoundBench.baseline avgt 6 2.589 ± 0.045 ns/op
RoundBench.round avgt 6 4.588 ± 0.182 ns/op
public class MathRoundPerformance {
static final int size = 16;
static float[] data = new float[size];
public static void main(String[] args) {
for (int i = 0; i < size; i++) {
data[i] = i;
}
for (int n=1000000; n<=100000000; n+=5000000)
{
long t0 = System.nanoTime();
int result = runTest(n);
long t1 = System.nanoTime();
System.out.printf(
"%s, Math.round(float), %s, %s, %.1f ms\n",
System.getProperty("java.version"),
n, result, (t1 - t0) / 1e6);
}
}
public static int runTest(int n) {
int result = 0;
for (int i = 0; i < n; i++) {
int i0 = (i+0) % size;
int i1 = (i+1) % size;
result += Math.round(data[i0] + data[i1]);
result += Math.round(data[i0] * data[i1]);
result += Math.round(data[i0] / data[i1]);
}
return result;
}
}
...
1.8.0_31, Math.round(float), 96000000, -351934592, 504,8 ms
....
1.8.0_40, Math.round(float), 96000000, -351934592, 544,0 ms
java -server -XX:+UnlockDiagnosticVMOptions -XX:+TraceClassLoading
-XX:+LogCompilation -XX:+PrintInlining -XX:+PrintAssembly
MathRoundPerformance