Java 低效的.Net fork/join

Java 低效的.Net fork/join,java,.net,multithreading,Java,.net,Multithreading,我已经使用Java fork/join pool和.Net parallel.Invoke实现了一个并行mergesort,我观察到两个版本之间在性能上的差异:5秒(Java)和45秒(.Net)对2GB文件进行排序。这几乎是数量级的差异 以下是.Net代码: public void ParallelSort() { Start = DateTime.Now.Ticks; int lo = 0; int hi = values.L

我已经使用Java fork/join pool和.Net parallel.Invoke实现了一个并行mergesort,我观察到两个版本之间在性能上的差异:5秒(Java)和45秒(.Net)对2GB文件进行排序。这几乎是数量级的差异

以下是.Net代码:

    public void ParallelSort()
    {
        Start = DateTime.Now.Ticks;

        int lo = 0;
        int hi = values.Length - 1;
        int mid = values.Length / 2;
        int maxDepth = Environment.ProcessorCount;
        Sort(lo, mid, hi, maxDepth);

        End = DateTime.Now.Ticks;
    }
    private void Sort(int lo, int mid, int hi, int maxDepth)
    {
       if (hi - lo < 16)
            InsertionSort.Sort(values, lo, hi);
        else if (maxDepth > 0)
        {
            Parallel.Invoke(
                () => Sort(lo, (mid + lo)/2, mid, maxDepth-1),
                () => Sort(mid + 1, (hi + mid)/2, hi, maxDepth-1));
            Merge(lo, mid, hi);

        }
        else
        {
            Sort(lo, (mid + lo)/2, mid, 0);
            Sort(mid + 1, (hi + mid)/2, hi, 0);
            Merge(lo, mid, hi);
        }
    }
 public void ParallelBottomUpSort()
    {

        for (int sz = 1; sz < values.Length; sz *= 2)
            Parallel.For(0, (int) Math.Ceiling((double) values.Length/(2*sz)), i =>
            {
                int lo = 2 * sz * i;
                int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
                int mid = lo + sz - 1;
                Merge(lo, mid, hi);
            });


    }   
更新1: 有趣的是,一个自下而上的实现显示了相反的结果:.Net的Parallel.For()和Java的ExecutorService.InvokeAll():4秒对193秒对256Mb文件进行排序。不知道,也许我做错了什么。 Java代码:

public void parallelBottomUpSort() throws Exception
{
    ExecutorService p = Executors.newFixedThreadPool(8);


    for (int sz = 1; sz < values.length; sz *= 2)
    {
        int N = (int) Math.ceil((double) values.length / (2 * sz));
        System.out.println(N);
        List<MergeSort> ms = new ArrayList<>(N);
        for (int i = 0; i < N; i++)
        {
            int lo = 2 * sz * i;
            int hi = Math.min(lo + 2 * sz, values.length) - 1;
            int mid = lo + sz - 1;
            ms.add(new MergeSort(values, aux, lo, mid, hi));
        }

        p.invokeAll(ms);
    }
    p.shutdown();
}
public void parallelboothuport()引发异常
{
ExecutorService p=Executors.newFixedThreadPool(8);
对于(int sz=1;sz
.Net代码:

    public void ParallelSort()
    {
        Start = DateTime.Now.Ticks;

        int lo = 0;
        int hi = values.Length - 1;
        int mid = values.Length / 2;
        int maxDepth = Environment.ProcessorCount;
        Sort(lo, mid, hi, maxDepth);

        End = DateTime.Now.Ticks;
    }
    private void Sort(int lo, int mid, int hi, int maxDepth)
    {
       if (hi - lo < 16)
            InsertionSort.Sort(values, lo, hi);
        else if (maxDepth > 0)
        {
            Parallel.Invoke(
                () => Sort(lo, (mid + lo)/2, mid, maxDepth-1),
                () => Sort(mid + 1, (hi + mid)/2, hi, maxDepth-1));
            Merge(lo, mid, hi);

        }
        else
        {
            Sort(lo, (mid + lo)/2, mid, 0);
            Sort(mid + 1, (hi + mid)/2, hi, 0);
            Merge(lo, mid, hi);
        }
    }
 public void ParallelBottomUpSort()
    {

        for (int sz = 1; sz < values.Length; sz *= 2)
            Parallel.For(0, (int) Math.Ceiling((double) values.Length/(2*sz)), i =>
            {
                int lo = 2 * sz * i;
                int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
                int mid = lo + sz - 1;
                Merge(lo, mid, hi);
            });


    }   
public void parallellbottomuport()
{
对于(int sz=1;sz
{
int-lo=2*sz*i;
inthi=Math.Min(lo+2*sz,values.Length)-1;
int mid=lo+sz-1;
合并(低、中、高);
});
}   
看起来.Net的并行引擎通常更稳定。Java的并发包有其最坏和最好的情况

更新2:可编译源

    class Sort
{
    private int[] values, aux;

     public Sort(int[] values)
    {
        this.values = values;
        this.aux = new int[values.Length];
    }

     private unsafe void Merge(int lo, int mid, int hi)
     {
         if (lo == hi)
             return;

         if (mid + 1 <= hi && values[mid] <= values[mid + 1])
             return;

         Buffer.BlockCopy(values, sizeof(int) * lo, aux, sizeof(int) * lo, sizeof(int) * (hi - lo + 1));

         int i = lo;
         int j = mid + 1;

         fixed (int* a = values, b = aux)
         {
             for (int k = lo; k <= hi; k++)
             {
                 if (i > mid)
                     a[k] = b[j++];
                 else if (j > hi)
                     a[k] = b[i++];
                 else if (b[i] < b[j])
                     a[k] = b[i++];
                 else
                     a[k] = b[j++];
             }
         }
     }

     public void ParallelBottomUpSort()
     {
         for (int sz = 1; sz < values.Length; sz *= 2)
             Parallel.For(0, (int)Math.Ceiling((double)values.Length / (2 * sz)), i =>
             {
                 int lo = 2 * sz * i;
                 int hi = Math.Min(lo + 2 * sz, values.Length) - 1;
                 int mid = lo + sz - 1;
                 Merge(lo, mid, hi);
             });

     }

     public void ParallelTopDownSort()
     {
         int lo = 0;
         int hi = values.Length - 1;
         int mid = values.Length / 2;
         int maxDepth = (int)Math.Log(Environment.ProcessorCount, 2);
         ParallelTopDownSort(lo, mid, hi, maxDepth);

     }

     private void ParallelTopDownSort(int lo, int mid, int hi, int maxDepth)
     {
         if (hi - lo < 16)
             InsertionSort.Sort(values, lo, hi);
         else if (maxDepth > 0)
         {
             Parallel.Invoke(
                 () => ParallelTopDownSort(lo, (mid + lo) / 2, mid, maxDepth - 1),
                 () => ParallelTopDownSort(mid + 1, (hi + mid) / 2, hi, maxDepth - 1));
             Merge(lo, mid, hi);

         }
         else
         {
             ParallelTopDownSort(lo, (mid + lo) / 2, mid, 0);
             ParallelTopDownSort(mid + 1, (hi + mid) / 2, hi, 0);
             Merge(lo, mid, hi);
         }
     }

    static unsafe void Main(string[] args)
    {
        // path to file to be sorted
        String path = args[0];

        Console.WriteLine("Reading file");
        int[] values;

        // Loading file
        var loadStart = DateTime.Now.Ticks;
        long intCount = (new FileInfo(path)).Length / 4;
        values = new int[intCount];
        MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(path);
        MemoryMappedViewAccessor mma = mmf.CreateViewAccessor();
        byte* ptr = (byte*) 0;
        mma.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
        Marshal.Copy(new IntPtr(ptr), values, 0, (int) intCount);
        var loadEnd = DateTime.Now.Ticks;
        Console.WriteLine("Loaded in " + (loadEnd - loadStart) / (10000) + " ms");

        // Sorting
        Console.WriteLine("Sorting " + values.Length + " ints");
        Sort ms = new Sort(values);
        var start = DateTime.Now.Ticks;
        ms.ParallelBottomUpSort();
        var end = DateTime.Now.Ticks;
        Console.WriteLine("Done sorting");
        Console.WriteLine((end - start) / (1000 * 10000));
    }


}
类排序
{
私有int[]值,aux;
公共排序(int[]值)
{
这个值=值;
this.aux=新的int[values.Length];
}
私有不安全无效合并(内部低、内部中、内部高)
{
如果(低==高)
返回;
如果(中间+1
{
int-lo=2*sz*i;
inthi=Math.Min(lo+2*sz,values.Length)-1;
int mid=lo+sz-1;
合并(低、中、高);
});
}
public void ParallelTopDownSort()
{
int-lo=0;
int hi=值。长度-1;
int mid=数值。长度/2;
intmaxDepth=(int)Math.Log(Environment.ProcessorCount,2);
ParallelTopDownSort(低、中、高、最大深度);
}
私有void ParallelTopDownSort(int-lo、int-mid、int-hi、int-maxDepth)
{
如果(高-低<16)
InsertionSort.Sort(值、lo、hi);
否则如果(最大深度>0)
{
并行调用(
()=>ParallelTopDownSort(lo,(mid+lo)/2,mid,maxDepth-1),
()=>ParallelTopDownSort(mid+1,(hi+mid)/2,hi,maxDepth-1);
合并(低、中、高);
}
其他的
{
平行顶向下排序(低,(中+低)/2,中,0);
平行顶向下排序(中+1,(高+中)/2,高,0);
合并(低、中、高);
}
}
静态不安全void Main(字符串[]args)
{
//要排序的文件的路径
字符串路径=args[0];
Console.WriteLine(“读取文件”);
int[]值;
//加载文件
var loadStart=DateTime.Now.Ticks;
long intCount=(新文件信息(路径)).Length/4;
值=新整数[intCount];
MemoryMappedFile mmf=MemoryMappedFile.CreateFromFile(路径);
MemoryMappedViewAccessor mma=mmf.CreateViewAccessor();
字节*ptr=(字节*)0;
mma.SafeMemoryMappedViewHandle.AcquirePointer(参考ptr);
Marshal.Copy(新的IntPtr(ptr),值0,(int)intCount);
var loadEnd=DateTime.Now.Ticks;
Console.WriteLine(“装入”+(loadEnd-loadStart)/(10000)+“ms”);
//分类
Console.WriteLine(“排序”+values.Length+ints”);
排序ms=新排序(值);
var start=DateTime.Now.Ticks;
Parallelbottomuport女士();
var end=DateTime.Now.Ticks;
控制台写入线(“完成排序”);
控制台写入线((结束-开始)/(1000*10000));
}
}

在任何文件共享网站上发布图像(如果您觉得这样会更容易理解),并在此处发布链接,我将为您更新问题。不要使用
DateTime。现在使用
来衡量性能。试试秒表。另外,请提供Java版本-没有它,很难比较任何东西。到并发可视化工具映像的公共链接,请随意在这里发布:感谢Java代码。请使用
秒表
重新验证您的结果好吗?您使用的处理器数量为
最大深度
,您不应该使用处理器数量的2-log吗?