C# 基于C语言的字典排序数组算法#
我知道在处理vb6之前我已经问过这样的问题,而且速度太慢了,所以我决定使用C#来完成这项工作;现在同样的代码运行速度提高了一倍,但仍然太慢 它之所以慢,是因为它从检查所有行的每一列末尾开始进行字典排序 我相信,如果我从第一列开始排序过程,检查所有行,并逐第一个字节检测该列的最低行,可能还有多行具有相同的第一个低字节,并将这些行分组,以便下一步检查第二个(下一步)列查找第二个字节中的哪一个是最低字节(如果它们都是相同的),然后移动到下一列等。。如果检测到下一行字节的不同之处,则对第一个字节执行列代码,并继续查找第二个最低字节。。这就是我认为这个过程应该如何工作以获得良好的速度提升。。但不幸的是,我对这种分类技术感到非常困惑,最终使用了别人帮助我的东西 当前代码通过强制排序从最后一列开始对所有行进行排序。。然后它向左移动一列,并对每一行重新排序,直到到达第一列并排序为止。这是缓慢的,因为它没有明显的原因进行迭代 假设有256列256行,总共65536个数组元素。。使用当前代码,并表示必须对每行进行多次排序,直到每行获得正确的排序顺序。对于每一列,可能需要65536次迭代。因此,每次调用该函数时,估计总共有256*65536=16777216次迭代,这就是它速度慢的实际原因 我知道这是一个很大的要求,但如果有人有一些空闲时间,也许已经这样做过,可以帮助我,我会很感激 这是我到目前为止必须使用的代码C# 基于C语言的字典排序数组算法#,c#,sorting,swap,lexicographic,C#,Sorting,Swap,Lexicographic,我知道在处理vb6之前我已经问过这样的问题,而且速度太慢了,所以我决定使用C#来完成这项工作;现在同样的代码运行速度提高了一倍,但仍然太慢 它之所以慢,是因为它从检查所有行的每一列末尾开始进行字典排序 我相信,如果我从第一列开始排序过程,检查所有行,并逐第一个字节检测该列的最低行,可能还有多行具有相同的第一个低字节,并将这些行分组,以便下一步检查第二个(下一步)列查找第二个字节中的哪一个是最低字节(如果它们都是相同的),然后移动到下一列等。。如果检测到下一行字节的不同之处,则对第一个字节执行列代
byte[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
byte[] lexicoGraphicalIndexes;
long dataSize = data.Length;
long squareRootMinusOne;
int squareRoot;
int row = 0;
bool rowSwapped;
byte[] tmpRow;
squareRoot = (int)Math.Sqrt(dataSize);
tmpRow = new byte[squareRoot];
squareRootMinusOne = squareRoot - 1;
lexicoGraphicalIndexes = new byte[squareRoot];
for(short column = 0; column < lexicoGraphicalIndexes.Length; column++) {
lexicoGraphicalIndexes[column] = (byte)column;
}
for(long column = squareRootMinusOne; column >= 0; column -= 1) {
do {
rowSwapped = false;
do {
if(data[(row * squareRoot) + column] > data[((row + 1) * squareRoot) + column]) {
//Swaps a full row in a few copies.
//Copies full row to tmpRow
Buffer.BlockCopy(data, (row * squareRoot), tmpRow, 0, squareRoot);
//Replace first row with second row.
Buffer.BlockCopy(data, ((row + 1) * squareRoot), data, (row * squareRoot), squareRoot);
//Replace second row with tmpRow
Buffer.BlockCopy(tmpRow, 0, data, ((row + 1) * squareRoot), squareRoot);
swapBytes(ref lexicoGraphicalIndexes, row, row + 1);
rowSwapped = true;
}
row++;
} while (row < squareRootMinusOne);
row = 0;
} while (rowSwapped != false);
}
return lexicoGraphicalIndexes;
}
public void swapBytes(ref byte[] data, long firstIndex, long secondIndex) {
byte tmpFirstByte = data[firstIndex];
data[firstIndex] = data[secondIndex];
data[secondIndex] = tmpFirstByte;
}
byte[]sortarrayofArraysLexicalography(参考byte[]数据){
字节[]词典编纂索引;
long dataSize=data.Length;
长正方形;
整数平方根;
int行=0;
布尔交换;
字节[]tmpRow;
平方根=(int)Math.Sqrt(dataSize);
tmpRow=新字节[平方根];
squareRootMinusOne=平方根-1;
lexicoGraphicalIndexes=新字节[平方根];
for(短列=0;列=0;列-=1){
做{
rowSwapped=false;
做{
如果(数据[(行*平方根)+列]>数据[(行+1)*平方根)+列]){
//交换几份完整的行。
//将整行复制到tmpRow
Buffer.BlockCopy(数据,(行*平方根),tmpRow,0,平方根);
//将第一行替换为第二行。
Buffer.BlockCopy(数据,((行+1)*平方根),数据,(行*平方根),平方根);
//将第二行替换为tmpRow
Buffer.BlockCopy(tmpRow,0,数据,((行+1)*平方根),平方根);
交换字节(参考词典,行,行+1);
rowSwapped=true;
}
行++;
}而(行<1);
行=0;
}while(行交换!=false);
}
返回字典表;
}
public void交换字节(ref byte[]数据,long firstIndex,long secondIndex){
字节tmpFirstByte=数据[firstIndex];
数据[firstIndex]=数据[secondIndex];
数据[secondIndex]=tmpFirstByte;
}
我必须说您的排序算法非常糟糕。即使没有任何优化和使用基本linq,速度也可以提高数十倍
我使用大小为N*N的数据进行了测试,其中N=200(我不确定下面的代码是否与您的代码完全匹配,是否100%正确,但至少您可以尝试查看结果)
-
公共类阵列比较程序:IComparer
{
公共整数比较(IEnumerable x,IEnumerable y)
{
var xenum=x.GetEnumerator();
var yenum=y.GetEnumerator();
while(xenum.MoveNext()&¥um.MoveNext())
{
如果(xenum.Current!=yenum.Current)
返回xenum.Current-yenum.Current;
}
返回0;
}
}
PS:
Batch
是一种扩展方法,它来自于最终编写了这个长怪物,但它似乎在一些测试运行中起到了作用。。不确定它是否完美无缺需要更多的测试,我会在做更多测试时更新它
int[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
int[] lexicoGraphicalIndexes;
long dataSize = data.Length;
int squareRoot;
bool rowSwapped;
squareRoot = (int)Math.Sqrt(dataSize);
lexicoGraphicalIndexes = new int[squareRoot];
for(int column = 0; column < lexicoGraphicalIndexes.Length; column++) {
lexicoGraphicalIndexes[column] = column;
}
byte currentLowestRowByte = 255; //set to highest to avoid unassigned local variable error.
int previousLowestRowByte = -1; //this is only used after the second pass.
int lowestRowIndex = -1; //hopefully this won't mess anything up.
List<int> lowestRowIndexes = new List<int>();
bool stillSorting = true;
int startRow = 0; //which row to start with, as the sorting process gets more sorted this number increases.
int startColumn = 0; //first column
while(stillSorting) {
//Resets
lowestRowIndexes.Clear();
startColumn = 0;
currentLowestRowByte = 255;
lowestRowIndex = -1;
//first step finds the lowest row in the first column
for(int row = 0; row < squareRoot; row += 1) {
if(data[(row * squareRoot) + startColumn] <= currentLowestRowByte &&
data[(row * squareRoot) + startColumn] > previousLowestRowByte) {
currentLowestRowByte = data[(row * squareRoot) + startColumn];
lowestRowIndex = row;
}
}
//Resets for next pass.
previousLowestRowByte = currentLowestRowByte;
//Check if sorting process is already finished. (No matches found from step 1).
if(lowestRowIndex == -1) {
stillSorting = false;
break;
}
//second step finds all the similar rows with the current lowestRowByte.
for(int row = 0; row < squareRoot; row += 1) {
if(data[(row * squareRoot) + startColumn] == currentLowestRowByte) {
lowestRowIndexes.Add(row);
}
}
//third step loops through all lowestRowIndexes to find which one comes first, second, third, etc...
if(lowestRowIndexes.Count > 1) {
//This sorts the same rows, rows*rows amount of times, until they are sorted correctly.
rowSwapped = true;
while(rowSwapped != false) {
rowSwapped = false;
for (int row = 0; row < lowestRowIndexes.Count; row++)
{
if((row+1) >= lowestRowIndexes.Count)
break;
//Current first row byte checked with Next first row byte in lowestRowIndexes.
//If both are equal keep going unto next column until a break is found, if any break.
startColumn = 1;
while(rowSwapped == false) {
//Reached beyond the last column.
if(startColumn == squareRoot)
break;
if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] == data[(lowestRowIndexes[row+1] * squareRoot) + startColumn])
startColumn++;
if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] < data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
break; //Sorted already, get out.
} else if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] > data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
swapBytesRow(ref data, lowestRowIndexes[row], lowestRowIndexes[row+1], squareRoot);
swapBytes(ref lexicoGraphicalIndexes, lowestRowIndexes[row], lowestRowIndexes[row+1]);
rowSwapped = true; //a swap has occurred.
break;
}
}
}
}
//forth step re-sorts all the pre-sorted lowestRowIndexes into master array, using startRow variable.
foreach(int row in lowestRowIndexes) {
//First checks if row is already in the proper sorted location.
if(row != startRow) {
swapBytesRow(ref data, startRow, row, squareRoot);
swapBytes(ref lexicoGraphicalIndexes, startRow, row);
startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
} else {
startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
}
}
} else {
//Only one instance of this lowestRowByte existed. so obviously this is the next best sorted match.
swapBytesRow(ref data, startRow, lowestRowIndex, squareRoot);
swapBytes(ref lexicoGraphicalIndexes, startRow, lowestRowIndex);
startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
}
}
return lexicoGraphicalIndexes;
}
我想要一个独立的东西,我知道我没有提到这是我的错。真的不喜欢像LinQ这样的新技术加上我测试后需要的额外文件,仍然无法工作无法理解
ThrowIfNull
,ThrowIfNonPositive
可能我还必须包含一些其他文件。。最后我自己写了这篇文章,花了我整整4个小时,但现在似乎写得更快了。。我将把它作为答案发布。如果数组长度不同,您的ArrayComparer
将无法正常工作。可能不会影响OP,但会阻止它成为通用比较器。代码的问题是使用了一种排序算法(类似于气泡排序),其复杂性为O(N*N)。你应该实现一个更好的排序算法,或者像我一样使用.Net的内置排序函数。我真的看不到其他方法。我对第一个字节相同的行进行排序,然后交换第一行、第二行等等。。就这样。在极少数情况下,主排序代码最多可排序5-7行。。只包含公共第一个字节的行只包含一次,即立即数
var list = data.Batch(N).Select(x => x.ToArray()).ToList();
list.Sort(new ArrayComparer());
public class ArrayComparer : IComparer<IEnumerable<byte>>
{
public int Compare(IEnumerable<byte> x, IEnumerable<byte> y)
{
var xenum = x.GetEnumerator();
var yenum = y.GetEnumerator();
while (xenum.MoveNext() && yenum.MoveNext())
{
if (xenum.Current != yenum.Current)
return xenum.Current - yenum.Current;
}
return 0;
}
}
int[] sortArrayOfArraysLexicoGraphically(ref byte[] data) {
int[] lexicoGraphicalIndexes;
long dataSize = data.Length;
int squareRoot;
bool rowSwapped;
squareRoot = (int)Math.Sqrt(dataSize);
lexicoGraphicalIndexes = new int[squareRoot];
for(int column = 0; column < lexicoGraphicalIndexes.Length; column++) {
lexicoGraphicalIndexes[column] = column;
}
byte currentLowestRowByte = 255; //set to highest to avoid unassigned local variable error.
int previousLowestRowByte = -1; //this is only used after the second pass.
int lowestRowIndex = -1; //hopefully this won't mess anything up.
List<int> lowestRowIndexes = new List<int>();
bool stillSorting = true;
int startRow = 0; //which row to start with, as the sorting process gets more sorted this number increases.
int startColumn = 0; //first column
while(stillSorting) {
//Resets
lowestRowIndexes.Clear();
startColumn = 0;
currentLowestRowByte = 255;
lowestRowIndex = -1;
//first step finds the lowest row in the first column
for(int row = 0; row < squareRoot; row += 1) {
if(data[(row * squareRoot) + startColumn] <= currentLowestRowByte &&
data[(row * squareRoot) + startColumn] > previousLowestRowByte) {
currentLowestRowByte = data[(row * squareRoot) + startColumn];
lowestRowIndex = row;
}
}
//Resets for next pass.
previousLowestRowByte = currentLowestRowByte;
//Check if sorting process is already finished. (No matches found from step 1).
if(lowestRowIndex == -1) {
stillSorting = false;
break;
}
//second step finds all the similar rows with the current lowestRowByte.
for(int row = 0; row < squareRoot; row += 1) {
if(data[(row * squareRoot) + startColumn] == currentLowestRowByte) {
lowestRowIndexes.Add(row);
}
}
//third step loops through all lowestRowIndexes to find which one comes first, second, third, etc...
if(lowestRowIndexes.Count > 1) {
//This sorts the same rows, rows*rows amount of times, until they are sorted correctly.
rowSwapped = true;
while(rowSwapped != false) {
rowSwapped = false;
for (int row = 0; row < lowestRowIndexes.Count; row++)
{
if((row+1) >= lowestRowIndexes.Count)
break;
//Current first row byte checked with Next first row byte in lowestRowIndexes.
//If both are equal keep going unto next column until a break is found, if any break.
startColumn = 1;
while(rowSwapped == false) {
//Reached beyond the last column.
if(startColumn == squareRoot)
break;
if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] == data[(lowestRowIndexes[row+1] * squareRoot) + startColumn])
startColumn++;
if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] < data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
break; //Sorted already, get out.
} else if(data[(lowestRowIndexes[row] * squareRoot) + startColumn] > data[(lowestRowIndexes[row+1] * squareRoot) + startColumn]) {
swapBytesRow(ref data, lowestRowIndexes[row], lowestRowIndexes[row+1], squareRoot);
swapBytes(ref lexicoGraphicalIndexes, lowestRowIndexes[row], lowestRowIndexes[row+1]);
rowSwapped = true; //a swap has occurred.
break;
}
}
}
}
//forth step re-sorts all the pre-sorted lowestRowIndexes into master array, using startRow variable.
foreach(int row in lowestRowIndexes) {
//First checks if row is already in the proper sorted location.
if(row != startRow) {
swapBytesRow(ref data, startRow, row, squareRoot);
swapBytes(ref lexicoGraphicalIndexes, startRow, row);
startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
} else {
startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
}
}
} else {
//Only one instance of this lowestRowByte existed. so obviously this is the next best sorted match.
swapBytesRow(ref data, startRow, lowestRowIndex, squareRoot);
swapBytes(ref lexicoGraphicalIndexes, startRow, lowestRowIndex);
startRow++; //skip Rows starting from value < startRow as they are perfectly sorted.
}
}
return lexicoGraphicalIndexes;
}
public void swapBytes(ref byte[] data, long firstIndex, long secondIndex) {
byte tmpFirstByte = data[firstIndex];
data[firstIndex] = data[secondIndex];
data[secondIndex] = tmpFirstByte;
}
public void swapBytes(ref int[] data, long firstIndex, long secondIndex) {
int tmpFirstByte = data[firstIndex];
data[firstIndex] = data[secondIndex];
data[secondIndex] = tmpFirstByte;
}
public void swapBytesRow(ref byte[] data, int firstRowIndex, int secondRowIndex, int rowSize) {
byte[] tmpFirstRowBytes = new byte[rowSize];
//Copies full row to tmpFirstRowBytes
Buffer.BlockCopy(data, (firstRowIndex * rowSize), tmpFirstRowBytes, 0, rowSize);
//Replace first row with second row.
Buffer.BlockCopy(data, (secondRowIndex * rowSize), data, (firstRowIndex * rowSize), rowSize);
//Replace second row with tmpFirstRowBytes
Buffer.BlockCopy(tmpFirstRowBytes, 0, data, (secondRowIndex * rowSize), rowSize);
}