C# 基于C语言的字典排序数组算法#_C#_Sorting_Swap_Lexicographic

C# 基于C语言的字典排序数组算法#

c# sorting

C# 基于C语言的字典排序数组算法#,c#,sorting,swap,lexicographic,C#,Sorting,Swap,Lexicographic,我知道在处理vb6之前我已经问过这样的问题，而且速度太慢了，所以我决定使用C#来完成这项工作；现在同样的代码运行速度提高了一倍，但仍然太慢它之所以慢，是因为它从检查所有行的每一列末尾开始进行字典排序我相信，如果我从第一列开始排序过程，检查所有行，并逐第一个字节检测该列的最低行，可能还有多行具有相同的第一个低字节，并将这些行分组，以便下一步检查第二个（下一步）列查找第二个字节中的哪一个是最低字节（如果它们都是相同的），然后移动到下一列等。。如果检测到下一行字节的不同之处，则对第一个字节执行列代

我知道在处理vb6之前我已经问过这样的问题，而且速度太慢了，所以我决定使用C#来完成这项工作；现在同样的代码运行速度提高了一倍，但仍然太慢

它之所以慢，是因为它从检查所有行的每一列末尾开始进行字典排序

我相信，如果我从第一列开始排序过程，检查所有行，并逐第一个字节检测该列的最低行，可能还有多行具有相同的第一个低字节，并将这些行分组，以便下一步检查第二个（下一步）列查找第二个字节中的哪一个是最低字节（如果它们都是相同的），然后移动到下一列等。。如果检测到下一行字节的不同之处，则对第一个字节执行列代码，并继续查找第二个最低字节。。这就是我认为这个过程应该如何工作以获得良好的速度提升。。但不幸的是，我对这种分类技术感到非常困惑，最终使用了别人帮助我的东西

当前代码通过强制排序从最后一列开始对所有行进行排序。。然后它向左移动一列，并对每一行重新排序，直到到达第一列并排序为止。这是缓慢的，因为它没有明显的原因进行迭代

假设有256列256行，总共65536个数组元素。。使用当前代码，并表示必须对每行进行多次排序，直到每行获得正确的排序顺序。对于每一列，可能需要65536次迭代。因此，每次调用该函数时，估计总共有256*65536=16777216次迭代，这就是它速度慢的实际原因

我知道这是一个很大的要求，但如果有人有一些空闲时间，也许已经这样做过，可以帮助我，我会很感激

这是我到目前为止必须使用的代码

byte[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
    byte[] lexicoGraphicalIndexes;
    long dataSize = data.Length;
    long squareRootMinusOne;
    int squareRoot;
    int row = 0;
    bool rowSwapped;
    byte[] tmpRow;

    squareRoot = (int)Math.Sqrt(dataSize);
    tmpRow = new byte[squareRoot];
    squareRootMinusOne = squareRoot - 1;
    lexicoGraphicalIndexes = new byte[squareRoot];

    for(short column = 0; column < lexicoGraphicalIndexes.Length; column++) {
        lexicoGraphicalIndexes[column] = (byte)column;
    }

    for(long column = squareRootMinusOne; column >= 0; column -= 1) {
        do {
            rowSwapped = false;
            do {
                if(data[(row * squareRoot) + column] > data[((row + 1) * squareRoot) + column]) {
                    //Swaps a full row in a few copies.
                    //Copies full row to tmpRow
                    Buffer.BlockCopy(data, (row * squareRoot), tmpRow, 0, squareRoot);
                    //Replace first row with second row.
                    Buffer.BlockCopy(data, ((row + 1) * squareRoot), data, (row * squareRoot), squareRoot);
                    //Replace second row with tmpRow
                    Buffer.BlockCopy(tmpRow, 0, data, ((row + 1) * squareRoot), squareRoot);
                    swapBytes(ref lexicoGraphicalIndexes, row, row + 1);
                    rowSwapped = true;
                }
                row++;
            } while (row < squareRootMinusOne);
            row = 0;
        } while (rowSwapped != false);
    }
    return lexicoGraphicalIndexes;
}

public void swapBytes(ref byte[] data, long firstIndex, long secondIndex) {
    byte tmpFirstByte = data[firstIndex];
    data[firstIndex] = data[secondIndex];
    data[secondIndex] = tmpFirstByte;
}

byte[]sortarrayofArraysLexicalography（参考byte[]数据）{
字节[]词典编纂索引；
long dataSize=data.Length；
长正方形；
整数平方根；
int行=0；
布尔交换；
字节[]tmpRow；
平方根=（int）Math.Sqrt（dataSize）；
tmpRow=新字节[平方根]；
squareRootMinusOne=平方根-1；
lexicoGraphicalIndexes=新字节[平方根]；
for（短列=0；列=0；列-=1）{
做{
rowSwapped=false；
做{
如果（数据[（行*平方根）+列]>数据[（行+1）*平方根）+列]）{
//交换几份完整的行。
//将整行复制到tmpRow
Buffer.BlockCopy（数据，（行*平方根），tmpRow，0，平方根）；
//将第一行替换为第二行。
Buffer.BlockCopy（数据，（（行+1）*平方根），数据，（行*平方根），平方根）；
//将第二行替换为tmpRow
Buffer.BlockCopy（tmpRow，0，数据，（（行+1）*平方根），平方根）；
交换字节（参考词典，行，行+1）；
rowSwapped=true；
}
行++；
}而（行<1）；
行=0；
}while（行交换！=false）；
}
返回字典表；
}
public void交换字节（ref byte[]数据，long firstIndex，long secondIndex）{
字节tmpFirstByte=数据[firstIndex]；
数据[firstIndex]=数据[secondIndex]；
数据[secondIndex]=tmpFirstByte；
}

我必须说您的排序算法非常糟糕。即使没有任何优化和使用基本linq，速度也可以提高数十倍

我使用大小为N*N的数据进行了测试，其中N=200（我不确定下面的代码是否与您的代码完全匹配，是否100%正确，但至少您可以尝试查看结果）

公共类阵列比较程序：IComparer
{
公共整数比较（IEnumerable x，IEnumerable y）
{
var xenum=x.GetEnumerator（）；
var yenum=y.GetEnumerator（）；
while（xenum.MoveNext（）&¥um.MoveNext（））
{
如果（xenum.Current！=yenum.Current）
返回xenum.Current-yenum.Current；
}
返回0；
}
}

PS:

Batch

是一种扩展方法，它来自于最终编写了这个长怪物，但它似乎在一些测试运行中起到了作用。。不确定它是否完美无缺需要更多的测试，我会在做更多测试时更新它

    int[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
        int[] lexicoGraphicalIndexes;
        long dataSize = data.Length;
        int squareRoot;
        bool rowSwapped;

        squareRoot = (int)Math.Sqrt(dataSize);
        lexicoGraphicalIndexes = new int[squareRoot];

        for(int column = 0; column < lexicoGraphicalIndexes.Length; column++) {
            lexicoGraphicalIndexes[column] = column;
        }

        byte currentLowestRowByte = 255; //set to highest to avoid unassigned local variable error.
        int previousLowestRowByte = -1; //this is only used after the second pass.
        int lowestRowIndex = -1; //hopefully this won't mess anything up.
        List<int> lowestRowIndexes = new List<int>();
        bool stillSorting = true;
        int startRow = 0; //which row to start with, as the sorting process gets more sorted this number increases.
        int startColumn = 0; //first column

        while(stillSorting) {
            //Resets
            lowestRowIndexes.Clear();
            startColumn = 0;
            currentLowestRowByte = 255;
            lowestRowIndex = -1;

            //first step finds the lowest row in the first column
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] <= currentLowestRowByte && 
                    data[(row * squareRoot) + startColumn] > previousLowestRowByte) {
                    currentLowestRowByte = data[(row * squareRoot) + startColumn];
                    lowestRowIndex = row;
                }
            }

            //Resets for next pass.
            previousLowestRowByte = currentLowestRowByte;

            //Check if sorting process is already finished. (No matches found from step 1).
            if(lowestRowIndex == -1) {
                stillSorting = false;
                break;
            }

            //second step finds all the similar rows with the current lowestRowByte.
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] == currentLowestRowByte) {
                    lowestRowIndexes.Add(row);
                }
            }

            //third step loops through all lowestRowIndexes to find which one comes first, second, third, etc...
            if(lowestRowIndexes.Count > 1) {
                //This sorts the same rows, rows*rows amount of times, until they are sorted correctly.
                rowSwapped = true;
                while(rowSwapped != false) {
                    rowSwapped = false;
                    for (int row = 0; row < lowestRowIndexes.Count; row++)
                    {
                        if((row+1) >= lowestRowIndexes.Count)
                            break;
                        //Current first row byte checked with Next first row byte in lowestRowIndexes.
                        //If both are equal keep going unto next column until a break is found, if any break.
                        startColumn = 1;
                        while(rowSwapped == false) {
                            //Reached beyond the last column.
                            if(startColumn == squareRoot)
                                break;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] == data[(lowestRowIndexes[row+1] * squareRoot) + startColumn])
                                startColumn++;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] < data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                break; //Sorted already, get out.
                            } else if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] > data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                swapBytesRow(ref data, lowestRowIndexes[row], lowestRowIndexes[row+1], squareRoot);
                                swapBytes(ref lexicoGraphicalIndexes, lowestRowIndexes[row], lowestRowIndexes[row+1]);
                                rowSwapped = true; //a swap has occurred.
                                break;
                            }
                        }
                    }
                }

                //forth step re-sorts all the pre-sorted lowestRowIndexes into master array, using startRow variable.
                foreach(int row in lowestRowIndexes) {

                    //First checks if row is already in the proper sorted location.
                    if(row != startRow) {
                        swapBytesRow(ref data, startRow, row, squareRoot);
                        swapBytes(ref lexicoGraphicalIndexes, startRow, row);
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    } else {
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    }                     
                }
            } else {
                //Only one instance of this lowestRowByte existed. so obviously this is the next best sorted match.
                swapBytesRow(ref data, startRow, lowestRowIndex, squareRoot);
                swapBytes(ref lexicoGraphicalIndexes, startRow, lowestRowIndex);
                startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
            }
        }
        return lexicoGraphicalIndexes;
    }

我想要一个独立的东西，我知道我没有提到这是我的错。真的不喜欢像LinQ这样的新技术加上我测试后需要的额外文件，仍然无法工作无法理解

ThrowIfNull

，

ThrowIfNonPositive

可能我还必须包含一些其他文件。。最后我自己写了这篇文章，花了我整整4个小时，但现在似乎写得更快了。。我将把它作为答案发布。如果数组长度不同，您的

ArrayComparer

将无法正常工作。可能不会影响OP，但会阻止它成为通用比较器。代码的问题是使用了一种排序算法（类似于气泡排序），其复杂性为O（N*N）。你应该实现一个更好的排序算法，或者像我一样使用.Net的内置排序函数。我真的看不到其他方法。我对第一个字节相同的行进行排序，然后交换第一行、第二行等等。。就这样。在极少数情况下，主排序代码最多可排序5-7行。。只包含公共第一个字节的行只包含一次，即立即数

var list = data.Batch(N).Select(x => x.ToArray()).ToList();
list.Sort(new ArrayComparer());

public class ArrayComparer : IComparer<IEnumerable<byte>>
{
    public int Compare(IEnumerable<byte> x, IEnumerable<byte> y)
    {
        var xenum = x.GetEnumerator();
        var yenum = y.GetEnumerator();
        while (xenum.MoveNext() && yenum.MoveNext())
        {
            if (xenum.Current != yenum.Current) 
                   return xenum.Current - yenum.Current;
        }
        return 0;
    }
}

    int[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
        int[] lexicoGraphicalIndexes;
        long dataSize = data.Length;
        int squareRoot;
        bool rowSwapped;

        squareRoot = (int)Math.Sqrt(dataSize);
        lexicoGraphicalIndexes = new int[squareRoot];

        for(int column = 0; column < lexicoGraphicalIndexes.Length; column++) {
            lexicoGraphicalIndexes[column] = column;
        }

        byte currentLowestRowByte = 255; //set to highest to avoid unassigned local variable error.
        int previousLowestRowByte = -1; //this is only used after the second pass.
        int lowestRowIndex = -1; //hopefully this won't mess anything up.
        List<int> lowestRowIndexes = new List<int>();
        bool stillSorting = true;
        int startRow = 0; //which row to start with, as the sorting process gets more sorted this number increases.
        int startColumn = 0; //first column

        while(stillSorting) {
            //Resets
            lowestRowIndexes.Clear();
            startColumn = 0;
            currentLowestRowByte = 255;
            lowestRowIndex = -1;

            //first step finds the lowest row in the first column
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] <= currentLowestRowByte && 
                    data[(row * squareRoot) + startColumn] > previousLowestRowByte) {
                    currentLowestRowByte = data[(row * squareRoot) + startColumn];
                    lowestRowIndex = row;
                }
            }

            //Resets for next pass.
            previousLowestRowByte = currentLowestRowByte;

            //Check if sorting process is already finished. (No matches found from step 1).
            if(lowestRowIndex == -1) {
                stillSorting = false;
                break;
            }

            //second step finds all the similar rows with the current lowestRowByte.
            for(int row = 0; row < squareRoot; row += 1) {
                if(data[(row * squareRoot) + startColumn] == currentLowestRowByte) {
                    lowestRowIndexes.Add(row);
                }
            }

            //third step loops through all lowestRowIndexes to find which one comes first, second, third, etc...
            if(lowestRowIndexes.Count > 1) {
                //This sorts the same rows, rows*rows amount of times, until they are sorted correctly.
                rowSwapped = true;
                while(rowSwapped != false) {
                    rowSwapped = false;
                    for (int row = 0; row < lowestRowIndexes.Count; row++)
                    {
                        if((row+1) >= lowestRowIndexes.Count)
                            break;
                        //Current first row byte checked with Next first row byte in lowestRowIndexes.
                        //If both are equal keep going unto next column until a break is found, if any break.
                        startColumn = 1;
                        while(rowSwapped == false) {
                            //Reached beyond the last column.
                            if(startColumn == squareRoot)
                                break;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] == data[(lowestRowIndexes[row+1] * squareRoot) + startColumn])
                                startColumn++;

                            if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] < data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                break; //Sorted already, get out.
                            } else if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] > data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
                                swapBytesRow(ref data, lowestRowIndexes[row], lowestRowIndexes[row+1], squareRoot);
                                swapBytes(ref lexicoGraphicalIndexes, lowestRowIndexes[row], lowestRowIndexes[row+1]);
                                rowSwapped = true; //a swap has occurred.
                                break;
                            }
                        }
                    }
                }

                //forth step re-sorts all the pre-sorted lowestRowIndexes into master array, using startRow variable.
                foreach(int row in lowestRowIndexes) {

                    //First checks if row is already in the proper sorted location.
                    if(row != startRow) {
                        swapBytesRow(ref data, startRow, row, squareRoot);
                        swapBytes(ref lexicoGraphicalIndexes, startRow, row);
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    } else {
                        startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
                    }                     
                }
            } else {
                //Only one instance of this lowestRowByte existed. so obviously this is the next best sorted match.
                swapBytesRow(ref data, startRow, lowestRowIndex, squareRoot);
                swapBytes(ref lexicoGraphicalIndexes, startRow, lowestRowIndex);
                startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
            }
        }
        return lexicoGraphicalIndexes;
    }

    public void swapBytes(ref byte[] data, long firstIndex, long secondIndex) {
        byte tmpFirstByte = data[firstIndex];
        data[firstIndex] = data[secondIndex];
        data[secondIndex] = tmpFirstByte;
    }

    public void swapBytes(ref int[] data, long firstIndex, long secondIndex) {
        int tmpFirstByte = data[firstIndex];
        data[firstIndex] = data[secondIndex];
        data[secondIndex] = tmpFirstByte;
    }

    public void swapBytesRow(ref byte[] data, int firstRowIndex, int secondRowIndex, int rowSize) {
        byte[] tmpFirstRowBytes = new byte[rowSize];
        //Copies full row to tmpFirstRowBytes
        Buffer.BlockCopy(data, (firstRowIndex * rowSize), tmpFirstRowBytes, 0, rowSize);
        //Replace first row with second row.
        Buffer.BlockCopy(data, (secondRowIndex * rowSize), data, (firstRowIndex * rowSize), rowSize);
        //Replace second row with tmpFirstRowBytes
        Buffer.BlockCopy(tmpFirstRowBytes, 0, data, (secondRowIndex * rowSize), rowSize);
    }