Java 低效的.Net fork/join
我已经使用Java fork/join pool和.Net parallel.Invoke实现了一个并行mergesort,我观察到两个版本之间在性能上的差异:5秒(Java)和45秒(.Net)对2GB文件进行排序。这几乎是数量级的差异 以下是.Net代码:Java 低效的.Net fork/join,java,.net,multithreading,Java,.net,Multithreading,我已经使用Java fork/join pool和.Net parallel.Invoke实现了一个并行mergesort,我观察到两个版本之间在性能上的差异:5秒(Java)和45秒(.Net)对2GB文件进行排序。这几乎是数量级的差异 以下是.Net代码: public void ParallelSort() { Start = DateTime.Now.Ticks; int lo = 0; int hi = values.L
public void ParallelSort()
{
Start = DateTime.Now.Ticks;
int lo = 0;
int hi = values.Length - 1;
int mid = values.Length / 2;
int maxDepth = Environment.ProcessorCount;
Sort(lo, mid, hi, maxDepth);
End = DateTime.Now.Ticks;
}
private void Sort(int lo, int mid, int hi, int maxDepth)
{
if (hi - lo < 16)
InsertionSort.Sort(values, lo, hi);
else if (maxDepth > 0)
{
Parallel.Invoke(
() => Sort(lo, (mid + lo)/2, mid, maxDepth-1),
() => Sort(mid + 1, (hi + mid)/2, hi, maxDepth-1));
Merge(lo, mid, hi);
}
else
{
Sort(lo, (mid + lo)/2, mid, 0);
Sort(mid + 1, (hi + mid)/2, hi, 0);
Merge(lo, mid, hi);
}
}
public void ParallelBottomUpSort()
{
for (int sz = 1; sz < values.Length; sz *= 2)
Parallel.For(0, (int) Math.Ceiling((double) values.Length/(2*sz)), i =>
{
int lo = 2 * sz * i;
int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
int mid = lo + sz - 1;
Merge(lo, mid, hi);
});
}
更新1:
有趣的是,一个自下而上的实现显示了相反的结果:.Net的Parallel.For()和Java的ExecutorService.InvokeAll():4秒对193秒对256Mb文件进行排序。不知道,也许我做错了什么。
Java代码:
public void parallelBottomUpSort() throws Exception
{
ExecutorService p = Executors.newFixedThreadPool(8);
for (int sz = 1; sz < values.length; sz *= 2)
{
int N = (int) Math.ceil((double) values.length / (2 * sz));
System.out.println(N);
List<MergeSort> ms = new ArrayList<>(N);
for (int i = 0; i < N; i++)
{
int lo = 2 * sz * i;
int hi = Math.min(lo + 2 * sz, values.length) - 1;
int mid = lo + sz - 1;
ms.add(new MergeSort(values, aux, lo, mid, hi));
}
p.invokeAll(ms);
}
p.shutdown();
}
public void parallelboothuport()引发异常
{
ExecutorService p=Executors.newFixedThreadPool(8);
对于(int sz=1;sz
.Net代码:
public void ParallelSort()
{
Start = DateTime.Now.Ticks;
int lo = 0;
int hi = values.Length - 1;
int mid = values.Length / 2;
int maxDepth = Environment.ProcessorCount;
Sort(lo, mid, hi, maxDepth);
End = DateTime.Now.Ticks;
}
private void Sort(int lo, int mid, int hi, int maxDepth)
{
if (hi - lo < 16)
InsertionSort.Sort(values, lo, hi);
else if (maxDepth > 0)
{
Parallel.Invoke(
() => Sort(lo, (mid + lo)/2, mid, maxDepth-1),
() => Sort(mid + 1, (hi + mid)/2, hi, maxDepth-1));
Merge(lo, mid, hi);
}
else
{
Sort(lo, (mid + lo)/2, mid, 0);
Sort(mid + 1, (hi + mid)/2, hi, 0);
Merge(lo, mid, hi);
}
}
public void ParallelBottomUpSort()
{
for (int sz = 1; sz < values.Length; sz *= 2)
Parallel.For(0, (int) Math.Ceiling((double) values.Length/(2*sz)), i =>
{
int lo = 2 * sz * i;
int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
int mid = lo + sz - 1;
Merge(lo, mid, hi);
});
}
public void parallellbottomuport()
{
对于(int sz=1;sz
{
int-lo=2*sz*i;
inthi=Math.Min(lo+2*sz,values.Length)-1;
int mid=lo+sz-1;
合并(低、中、高);
});
}
看起来.Net的并行引擎通常更稳定。Java的并发包有其最坏和最好的情况
更新2:可编译源
class Sort
{
private int[] values, aux;
public Sort(int[] values)
{
this.values = values;
this.aux = new int[values.Length];
}
private unsafe void Merge(int lo, int mid, int hi)
{
if (lo == hi)
return;
if (mid + 1 <= hi && values[mid] <= values[mid + 1])
return;
Buffer.BlockCopy(values, sizeof(int) * lo, aux, sizeof(int) * lo, sizeof(int) * (hi - lo + 1));
int i = lo;
int j = mid + 1;
fixed (int* a = values, b = aux)
{
for (int k = lo; k <= hi; k++)
{
if (i > mid)
a[k] = b[j++];
else if (j > hi)
a[k] = b[i++];
else if (b[i] < b[j])
a[k] = b[i++];
else
a[k] = b[j++];
}
}
}
public void ParallelBottomUpSort()
{
for (int sz = 1; sz < values.Length; sz *= 2)
Parallel.For(0, (int)Math.Ceiling((double)values.Length / (2 * sz)), i =>
{
int lo = 2 * sz * i;
int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
int mid = lo + sz - 1;
Merge(lo, mid, hi);
});
}
public void ParallelTopDownSort()
{
int lo = 0;
int hi = values.Length - 1;
int mid = values.Length / 2;
int maxDepth = (int)Math.Log(Environment.ProcessorCount, 2);
ParallelTopDownSort(lo, mid, hi, maxDepth);
}
private void ParallelTopDownSort(int lo, int mid, int hi, int maxDepth)
{
if (hi - lo < 16)
InsertionSort.Sort(values, lo, hi);
else if (maxDepth > 0)
{
Parallel.Invoke(
() => ParallelTopDownSort(lo, (mid + lo) / 2, mid, maxDepth - 1),
() => ParallelTopDownSort(mid + 1, (hi + mid) / 2, hi, maxDepth - 1));
Merge(lo, mid, hi);
}
else
{
ParallelTopDownSort(lo, (mid + lo) / 2, mid, 0);
ParallelTopDownSort(mid + 1, (hi + mid) / 2, hi, 0);
Merge(lo, mid, hi);
}
}
static unsafe void Main(string[] args)
{
// path to file to be sorted
String path = args[0];
Console.WriteLine("Reading file");
int[] values;
// Loading file
var loadStart = DateTime.Now.Ticks;
long intCount = (new FileInfo(path)).Length / 4;
values = new int[intCount];
MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(path);
MemoryMappedViewAccessor mma = mmf.CreateViewAccessor();
byte* ptr = (byte*) 0;
mma.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
Marshal.Copy(new IntPtr(ptr), values, 0, (int) intCount);
var loadEnd = DateTime.Now.Ticks;
Console.WriteLine("Loaded in " + (loadEnd - loadStart) / (10000) + " ms");
// Sorting
Console.WriteLine("Sorting " + values.Length + " ints");
Sort ms = new Sort(values);
var start = DateTime.Now.Ticks;
ms.ParallelBottomUpSort();
var end = DateTime.Now.Ticks;
Console.WriteLine("Done sorting");
Console.WriteLine((end - start) / (1000 * 10000));
}
}
类排序
{
私有int[]值,aux;
公共排序(int[]值)
{
这个值=值;
this.aux=新的int[values.Length];
}
私有不安全无效合并(内部低、内部中、内部高)
{
如果(低==高)
返回;
如果(中间+1
{
int-lo=2*sz*i;
inthi=Math.Min(lo+2*sz,values.Length)-1;
int mid=lo+sz-1;
合并(低、中、高);
});
}
public void ParallelTopDownSort()
{
int-lo=0;
int hi=值。长度-1;
int mid=数值。长度/2;
intmaxDepth=(int)Math.Log(Environment.ProcessorCount,2);
ParallelTopDownSort(低、中、高、最大深度);
}
私有void ParallelTopDownSort(int-lo、int-mid、int-hi、int-maxDepth)
{
如果(高-低<16)
InsertionSort.Sort(值、lo、hi);
否则如果(最大深度>0)
{
并行调用(
()=>ParallelTopDownSort(lo,(mid+lo)/2,mid,maxDepth-1),
()=>ParallelTopDownSort(mid+1,(hi+mid)/2,hi,maxDepth-1);
合并(低、中、高);
}
其他的
{
平行顶向下排序(低,(中+低)/2,中,0);
平行顶向下排序(中+1,(高+中)/2,高,0);
合并(低、中、高);
}
}
静态不安全void Main(字符串[]args)
{
//要排序的文件的路径
字符串路径=args[0];
Console.WriteLine(“读取文件”);
int[]值;
//加载文件
var loadStart=DateTime.Now.Ticks;
long intCount=(新文件信息(路径)).Length/4;
值=新整数[intCount];
MemoryMappedFile mmf=MemoryMappedFile.CreateFromFile(路径);
MemoryMappedViewAccessor mma=mmf.CreateViewAccessor();
字节*ptr=(字节*)0;
mma.SafeMemoryMappedViewHandle.AcquirePointer(参考ptr);
Marshal.Copy(新的IntPtr(ptr),值0,(int)intCount);
var loadEnd=DateTime.Now.Ticks;
Console.WriteLine(“装入”+(loadEnd-loadStart)/(10000)+“ms”);
//分类
Console.WriteLine(“排序”+values.Length+ints”);
排序ms=新排序(值);
var start=DateTime.Now.Ticks;
Parallelbottomuport女士();
var end=DateTime.Now.Ticks;
控制台写入线(“完成排序”);
控制台写入线((结束-开始)/(1000*10000));
}
}
在任何文件共享网站上发布图像(如果您觉得这样会更容易理解),并在此处发布链接,我将为您更新问题。不要使用DateTime。现在使用来衡量性能。试试秒表。另外,请提供Java版本-没有它,很难比较任何东西。到并发可视化工具映像的公共链接,请随意在这里发布:感谢Java代码。请使用秒表
重新验证您的结果好吗?您使用的处理器数量为最大深度
,您不应该使用处理器数量的2-log吗?