Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/sorting/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 基于C语言的字典排序数组算法#_C#_Sorting_Swap_Lexicographic - Fatal编程技术网

C# 基于C语言的字典排序数组算法#

C# 基于C语言的字典排序数组算法#,c#,sorting,swap,lexicographic,C#,Sorting,Swap,Lexicographic,我知道在处理vb6之前我已经问过这样的问题,而且速度太慢了,所以我决定使用C#来完成这项工作;现在同样的代码运行速度提高了一倍,但仍然太慢 它之所以慢,是因为它从检查所有行的每一列末尾开始进行字典排序 我相信,如果我从第一列开始排序过程,检查所有行,并逐第一个字节检测该列的最低行,可能还有多行具有相同的第一个低字节,并将这些行分组,以便下一步检查第二个(下一步)列查找第二个字节中的哪一个是最低字节(如果它们都是相同的),然后移动到下一列等。。如果检测到下一行字节的不同之处,则对第一个字节执行列代

我知道在处理vb6之前我已经问过这样的问题,而且速度太慢了,所以我决定使用C#来完成这项工作;现在同样的代码运行速度提高了一倍,但仍然太慢

它之所以慢,是因为它从检查所有行的每一列末尾开始进行字典排序

我相信,如果我从第一列开始排序过程,检查所有行,并逐第一个字节检测该列的最低行,可能还有多行具有相同的第一个低字节,并将这些行分组,以便下一步检查第二个(下一步)列查找第二个字节中的哪一个是最低字节(如果它们都是相同的),然后移动到下一列等。。如果检测到下一行字节的不同之处,则对第一个字节执行列代码,并继续查找第二个最低字节。。这就是我认为这个过程应该如何工作以获得良好的速度提升。。但不幸的是,我对这种分类技术感到非常困惑,最终使用了别人帮助我的东西

当前代码通过强制排序从最后一列开始对所有行进行排序。。然后它向左移动一列,并对每一行重新排序,直到到达第一列并排序为止。这是缓慢的,因为它没有明显的原因进行迭代

假设有256列256行,总共65536个数组元素。。使用当前代码,并表示必须对每行进行多次排序,直到每行获得正确的排序顺序。对于每一列,可能需要65536次迭代。因此,每次调用该函数时,估计总共有256*65536=16777216次迭代,这就是它速度慢的实际原因

我知道这是一个很大的要求,但如果有人有一些空闲时间,也许已经这样做过,可以帮助我,我会很感激

这是我到目前为止必须使用的代码

byte[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
    byte[] lexicoGraphicalIndexes;
    long dataSize = data.Length;
    long squareRootMinusOne;
    int squareRoot;
    int row = 0;
    bool rowSwapped;
    byte[] tmpRow;

    squareRoot = (int)Math.Sqrt(dataSize);
    tmpRow = new byte[squareRoot];
    squareRootMinusOne = squareRoot - 1;
    lexicoGraphicalIndexes = new byte[squareRoot];

    for(short column = 0; column < lexicoGraphicalIndexes.Length; column++) {
        lexicoGraphicalIndexes[column] = (byte)column;
    }

    for(long column = squareRootMinusOne; column >= 0; column -= 1) {
        do {
            rowSwapped = false;
            do {
                if(data[(row * squareRoot) + column] > data[((row + 1) * squareRoot) + column]) {
                    //Swaps a full row in a few copies.
                    //Copies full row to tmpRow
                    Buffer.BlockCopy(data, (row * squareRoot), tmpRow, 0, squareRoot);
                    //Replace first row with second row.
                    Buffer.BlockCopy(data, ((row + 1) * squareRoot), data, (row * squareRoot), squareRoot);
                    //Replace second row with tmpRow
                    Buffer.BlockCopy(tmpRow, 0, data, ((row + 1) * squareRoot), squareRoot);
                    swapBytes(ref lexicoGraphicalIndexes, row, row + 1);
                    rowSwapped = true;
                }
                row++;
            } while (row < squareRootMinusOne);
            row = 0;
        } while (rowSwapped != false);
    }
    return lexicoGraphicalIndexes;
}

public void swapBytes(ref byte[] data, long firstIndex, long secondIndex) {
    byte tmpFirstByte = data[firstIndex];
    data[firstIndex] = data[secondIndex];
    data[secondIndex] = tmpFirstByte;
}
byte[]sortarrayofArraysLexicalography(参考byte[]数据){
字节[]词典编纂索引;
long dataSize=data.Length;
长正方形;
整数平方根;
int行=0;
布尔交换;
字节[]tmpRow;
平方根=(int)Math.Sqrt(dataSize);
tmpRow=新字节[平方根];
squareRootMinusOne=平方根-1;
lexicoGraphicalIndexes=新字节[平方根];
for(短列=0;列=0;列-=1){
做{
rowSwapped=false;
做{
如果(数据[(行*平方根)+列]>数据[(行+1)*平方根)+列]){
//交换几份完整的行。
//将整行复制到tmpRow
Buffer.BlockCopy(数据,(行*平方根),tmpRow,0,平方根);
//将第一行替换为第二行。
Buffer.BlockCopy(数据,((行+1)*平方根),数据,(行*平方根),平方根);
//将第二行替换为tmpRow
Buffer.BlockCopy(tmpRow,0,数据,((行+1)*平方根),平方根);
交换字节(参考词典,行,行+1);
rowSwapped=true;
}
行++;
}而(行<1);
行=0;
}while(行交换!=false);
}
返回字典表;
}
public void交换字节(ref byte[]数据,long firstIndex,long secondIndex){
字节tmpFirstByte=数据[firstIndex];
数据[firstIndex]=数据[secondIndex];
数据[secondIndex]=tmpFirstByte;
}

我必须说您的排序算法非常糟糕。即使没有任何优化和使用基本linq,速度也可以提高数十倍

我使用大小为N*N的数据进行了测试,其中N=200(我不确定下面的代码是否与您的代码完全匹配,是否100%正确,但至少您可以尝试查看结果)

-

公共类阵列比较程序:IComparer
{
公共整数比较(IEnumerable x,IEnumerable y)
{
var xenum=x.GetEnumerator();
var yenum=y.GetEnumerator();
while(xenum.MoveNext()&¥um.MoveNext())
{
如果(xenum.Current!=yenum.Current)
返回xenum.Current-yenum.Current;
}
返回0;
}
}

PS:
Batch
是一种扩展方法,它来自于最终编写了这个长怪物,但它似乎在一些测试运行中起到了作用。。不确定它是否完美无缺需要更多的测试,我会在做更多测试时更新它

    int[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
        int[] lexicoGraphicalIndexes;
        long dataSize = data.Length;
        int squareRoot;
        bool rowSwapped;

        squareRoot = (int)Math.Sqrt(dataSize);
        lexicoGraphicalIndexes = new int[squareRoot];

        for(int column = 0; column < lexicoGraphicalIndexes.Length; column++) {
            lexicoGraphicalIndexes[column] = column;
        }

        byte currentLowestRowByte = 255; //set to highest to avoid unassigned local variable error.
        int previousLowestRowByte = -1; //this is only used after the second pass.
        int lowestRowIndex = -1; //hopefully this won't mess anything up.
        List<int> lowestRowIndexes = new List<int>();
        bool stillSorting = true;
        int startRow = 0; //which row to start with, as the sorting process gets more sorted this number increases.
        int startColumn = 0; //first column

        while(stillSorting) {
            //Resets
            lowestRowIndexes.Clear();
            startColumn = 0;
            currentLowestRowByte = 255;
            lowestRowIndex = -1;

            //first step finds the lowest row in the first column
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] <= currentLowestRowByte && 
                    data[(row * squareRoot) + startColumn] > previousLowestRowByte) {
                    currentLowestRowByte = data[(row * squareRoot) + startColumn];
                    lowestRowIndex = row;
                }
            }

            //Resets for next pass.
            previousLowestRowByte = currentLowestRowByte;

            //Check if sorting process is already finished. (No matches found from step 1).
            if(lowestRowIndex == -1) {
                stillSorting = false;
                break;
            }

            //second step finds all the similar rows with the current lowestRowByte.
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] == currentLowestRowByte) {
                    lowestRowIndexes.Add(row);
                }
            }

            //third step loops through all lowestRowIndexes to find which one comes first, second, third, etc...
            if(lowestRowIndexes.Count > 1) {
                //This sorts the same rows, rows*rows amount of times, until they are sorted correctly.
                rowSwapped = true;
                while(rowSwapped != false) {
                    rowSwapped = false;
                    for (int row = 0; row < lowestRowIndexes.Count; row++)
                    {
                        if((row+1) >= lowestRowIndexes.Count)
                            break;
                        //Current first row byte checked with Next first row byte in lowestRowIndexes.
                        //If both are equal keep going unto next column until a break is found, if any break.
                        startColumn = 1;
                        while(rowSwapped == false) {
                            //Reached beyond the last column.
                            if(startColumn == squareRoot)
                                break;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] == data[(lowestRowIndexes[row+1] * squareRoot) + startColumn])
                                startColumn++;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] < data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                break; //Sorted already, get out.
                            } else if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] > data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                swapBytesRow(ref data, lowestRowIndexes[row], lowestRowIndexes[row+1], squareRoot);
                                swapBytes(ref lexicoGraphicalIndexes, lowestRowIndexes[row], lowestRowIndexes[row+1]);
                                rowSwapped = true; //a swap has occurred.
                                break;
                            }
                        }
                    }
                }

                //forth step re-sorts all the pre-sorted lowestRowIndexes into master array, using startRow variable.
                foreach(int row in lowestRowIndexes) {

                    //First checks if row is already in the proper sorted location.
                    if(row != startRow) {
                        swapBytesRow(ref data, startRow, row, squareRoot);
                        swapBytes(ref lexicoGraphicalIndexes, startRow, row);
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    } else {
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    }                     
                }
            } else {
                //Only one instance of this lowestRowByte existed. so obviously this is the next best sorted match.
                swapBytesRow(ref data, startRow, lowestRowIndex, squareRoot);
                swapBytes(ref lexicoGraphicalIndexes, startRow, lowestRowIndex);
                startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
            }
        }
        return lexicoGraphicalIndexes;
    }


我想要一个独立的东西,我知道我没有提到这是我的错。真的不喜欢像LinQ这样的新技术加上我测试后需要的额外文件,仍然无法工作无法理解
ThrowIfNull
ThrowIfNonPositive
可能我还必须包含一些其他文件。。最后我自己写了这篇文章,花了我整整4个小时,但现在似乎写得更快了。。我将把它作为答案发布。如果数组长度不同,您的
ArrayComparer
将无法正常工作。可能不会影响OP,但会阻止它成为通用比较器。代码的问题是使用了一种排序算法(类似于气泡排序),其复杂性为O(N*N)。你应该实现一个更好的排序算法,或者像我一样使用.Net的内置排序函数。我真的看不到其他方法。我对第一个字节相同的行进行排序,然后交换第一行、第二行等等。。就这样。在极少数情况下,主排序代码最多可排序5-7行。。只包含公共第一个字节的行只包含一次,即立即数
var list = data.Batch(N).Select(x => x.ToArray()).ToList();
list.Sort(new ArrayComparer());
public class ArrayComparer : IComparer<IEnumerable<byte>>
{
    public int Compare(IEnumerable<byte> x, IEnumerable<byte> y)
    {
        var xenum = x.GetEnumerator();
        var yenum = y.GetEnumerator();
        while (xenum.MoveNext() && yenum.MoveNext())
        {
            if (xenum.Current != yenum.Current) 
                   return xenum.Current - yenum.Current;
        }
        return 0;
    }
}
    int[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
        int[] lexicoGraphicalIndexes;
        long dataSize = data.Length;
        int squareRoot;
        bool rowSwapped;

        squareRoot = (int)Math.Sqrt(dataSize);
        lexicoGraphicalIndexes = new int[squareRoot];

        for(int column = 0; column < lexicoGraphicalIndexes.Length; column++) {
            lexicoGraphicalIndexes[column] = column;
        }

        byte currentLowestRowByte = 255; //set to highest to avoid unassigned local variable error.
        int previousLowestRowByte = -1; //this is only used after the second pass.
        int lowestRowIndex = -1; //hopefully this won't mess anything up.
        List<int> lowestRowIndexes = new List<int>();
        bool stillSorting = true;
        int startRow = 0; //which row to start with, as the sorting process gets more sorted this number increases.
        int startColumn = 0; //first column

        while(stillSorting) {
            //Resets
            lowestRowIndexes.Clear();
            startColumn = 0;
            currentLowestRowByte = 255;
            lowestRowIndex = -1;

            //first step finds the lowest row in the first column
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] <= currentLowestRowByte && 
                    data[(row * squareRoot) + startColumn] > previousLowestRowByte) {
                    currentLowestRowByte = data[(row * squareRoot) + startColumn];
                    lowestRowIndex = row;
                }
            }

            //Resets for next pass.
            previousLowestRowByte = currentLowestRowByte;

            //Check if sorting process is already finished. (No matches found from step 1).
            if(lowestRowIndex == -1) {
                stillSorting = false;
                break;
            }

            //second step finds all the similar rows with the current lowestRowByte.
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] == currentLowestRowByte) {
                    lowestRowIndexes.Add(row);
                }
            }

            //third step loops through all lowestRowIndexes to find which one comes first, second, third, etc...
            if(lowestRowIndexes.Count > 1) {
                //This sorts the same rows, rows*rows amount of times, until they are sorted correctly.
                rowSwapped = true;
                while(rowSwapped != false) {
                    rowSwapped = false;
                    for (int row = 0; row < lowestRowIndexes.Count; row++)
                    {
                        if((row+1) >= lowestRowIndexes.Count)
                            break;
                        //Current first row byte checked with Next first row byte in lowestRowIndexes.
                        //If both are equal keep going unto next column until a break is found, if any break.
                        startColumn = 1;
                        while(rowSwapped == false) {
                            //Reached beyond the last column.
                            if(startColumn == squareRoot)
                                break;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] == data[(lowestRowIndexes[row+1] * squareRoot) + startColumn])
                                startColumn++;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] < data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                break; //Sorted already, get out.
                            } else if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] > data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                swapBytesRow(ref data, lowestRowIndexes[row], lowestRowIndexes[row+1], squareRoot);
                                swapBytes(ref lexicoGraphicalIndexes, lowestRowIndexes[row], lowestRowIndexes[row+1]);
                                rowSwapped = true; //a swap has occurred.
                                break;
                            }
                        }
                    }
                }

                //forth step re-sorts all the pre-sorted lowestRowIndexes into master array, using startRow variable.
                foreach(int row in lowestRowIndexes) {

                    //First checks if row is already in the proper sorted location.
                    if(row != startRow) {
                        swapBytesRow(ref data, startRow, row, squareRoot);
                        swapBytes(ref lexicoGraphicalIndexes, startRow, row);
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    } else {
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    }                     
                }
            } else {
                //Only one instance of this lowestRowByte existed. so obviously this is the next best sorted match.
                swapBytesRow(ref data, startRow, lowestRowIndex, squareRoot);
                swapBytes(ref lexicoGraphicalIndexes, startRow, lowestRowIndex);
                startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
            }
        }
        return lexicoGraphicalIndexes;
    }
    public void swapBytes(ref byte[] data, long firstIndex, long secondIndex) {
        byte tmpFirstByte = data[firstIndex];
        data[firstIndex] = data[secondIndex];
        data[secondIndex] = tmpFirstByte;
    }
    public void swapBytes(ref int[] data, long firstIndex, long secondIndex) {
        int tmpFirstByte = data[firstIndex];
        data[firstIndex] = data[secondIndex];
        data[secondIndex] = tmpFirstByte;
    }
    public void swapBytesRow(ref byte[] data, int firstRowIndex, int secondRowIndex, int rowSize) {
        byte[] tmpFirstRowBytes = new byte[rowSize];
        //Copies full row to tmpFirstRowBytes
        Buffer.BlockCopy(data, (firstRowIndex * rowSize), tmpFirstRowBytes, 0, rowSize);
        //Replace first row with second row.
        Buffer.BlockCopy(data, (secondRowIndex * rowSize), data, (firstRowIndex * rowSize), rowSize);
        //Replace second row with tmpFirstRowBytes
        Buffer.BlockCopy(tmpFirstRowBytes, 0, data, (secondRowIndex * rowSize), rowSize);
    }