C++ 在并行桶排序中使用递归基数排序

C++ 在并行桶排序中使用递归基数排序,c++,algorithm,sorting,recursion,radix-sort,C++,Algorithm,Sorting,Recursion,Radix Sort,我正在尝试编写一个快速算法,对大量整数向量进行排序,如: 15914597654 到目前为止,我的程序通过MSD将向量分解为小桶,如: bucket[1]:159 14 bucket[5]:5 54 bucket[6]:6 bucket[9]:97 现在我需要使用递归基数排序,以最高有效数字顺序对存储桶进行排序: bucket[1]:14 159 bucket[5]:5 54 bucket[6]:6 bucket[9]:97 这是我在网上找到的递归基数代码: // Sort 'size' n

我正在尝试编写一个快速算法,对大量整数向量进行排序,如:

15914597654

到目前为止,我的程序通过MSD将向量分解为小桶,如:

bucket[1]:159 14
bucket[5]:5 54
bucket[6]:6
bucket[9]:97
现在我需要使用递归基数排序,以最高有效数字顺序对存储桶进行排序:

bucket[1]:14 159
bucket[5]:5 54
bucket[6]:6
bucket[9]:97
这是我在网上找到的递归基数代码:

// Sort 'size' number of integers starting at 'input' according to the 'digit'th digit
// For the parameter 'digit', 0 denotes the least significant digit and increases as significance does
void radixSort(int* input, int size, int digit){
  if (size == 0)
    return;

  int[10] buckets;    // assuming decimal numbers

  // Sort the array in place while keeping track of bucket starting indices.
  // If bucket[i] is meant to be empty (no numbers with i at the specified digit),
  // then let bucket[i+1] = bucket[i]

  for (int i = 0; i < 10; ++i)
  {
    radixSort(input + buckets[i], buckets[i+1] - buckets[i], digit+1);
  }
}
//根据第位“数字”对从“输入”开始的整数的“大小”数进行排序
//对于参数“digit”,0表示最低有效位,并随着有效位的增加而增加
void radixSort(整数*输入、整数大小、整数位数){
如果(大小==0)
返回;
int[10]bucket;//假设为十进制数
//在适当位置对数组进行排序,同时跟踪存储桶起始索引。
//如果bucket[i]是空的(指定数字处没有i的数字),
//然后让bucket[i+1]=bucket[i]
对于(int i=0;i<10;++i)
{
radixSort(输入+存储桶[i],存储桶[i+1]-存储桶[i],数字+1);
}
}
我不知道如何在我的代码中实现这一点,我不确定bucket[]在上面的代码中做了什么。有人能解释一下我应该做什么改变吗?这是我正在使用的多线程代码,由于我没有使用递归,所以执行得不好

void sort(unsigned int numCores, std::vector<unsigned int> numbersToSort){
// ******************Stage 1****************
// Use multithread to seperate numbers into buckets using the most significant digits
  auto smallbuckets = std::vector<std::shared_ptr<std::vector<std::vector<unsigned int>>>>();
  std::mutex mutex;

  unsigned int workload = numbersToSort.size() / numCores;

  std::function<void(unsigned int, unsigned int, unsigned int)> put_small_buckets;
  put_small_buckets = [this, &smallbuckets, &mutex]
(unsigned int id, unsigned int start, unsigned int end) {

    auto buckets = std::make_shared<std::vector<std::vector<unsigned int>>>(std::vector<std::vector<unsigned int>>());
    for (int j = 0; j < 10; ++j) {
        buckets->push_back(std::vector<unsigned int>());
    }

    for (unsigned int i = start; i < end; ++i) {
        unsigned int a = numbersToSort[i];
        std::string tmp = std::to_string(a);
        char c = tmp.at(0);
        int ia = c - '0';
        (*buckets)[ia].push_back(numbersToSort[i]);
    }
    std::lock_guard<std::mutex> lock(mutex);
    smallbuckets.push_back(buckets);
  };

// create a container of threads
  std::vector<std::shared_ptr<std::thread>> containerOfThreads;

// create threads and add them to the container.
  for (unsigned int i = 0; i < numCores; ++i) {
    // start the thread.
    unsigned int start = workload * i;
    unsigned int end = workload * (i + 1);
    if(i == numCores - 1) end = this->numbersToSort.size() ;
    containerOfThreads.push_back(std::make_shared<std::thread>(put_small_buckets, i, start, end));
  }

// join all the threads back together.
  for (auto t : containerOfThreads) t->join();

  numbersToSort.clear();
// ******************Stage 2****************
// Put small multithreaded buckets back to the bucket of radix(10)

  auto bigbuckets = std::vector<std::shared_ptr<std::vector<unsigned int>>>();
  for (int j = 0; j < 10; ++j) {
    bigbuckets.push_back(std::make_shared<std::vector<unsigned int>>(std::vector<unsigned int>()));
  }

int current_index = 10;

std::function<void()> collect;
collect = [this, &smallbuckets, &current_index, &mutex, &collect, &bigbuckets] () {
    mutex.lock();
    int index = --current_index;
    mutex.unlock();
    if (index < 0) return;
    auto mybucket = bigbuckets[index];
    for (auto i = smallbuckets.begin(); i != smallbuckets.end(); ++i) {
        mybucket->insert(mybucket->end(), (*(*i))[index].begin(), (*(*i))[index].end());
    }
    collect();
  };

// create a container of threads
  containerOfThreads.clear();

// create threads and add them to the container.
  for (unsigned int i = 0; i < numCores; ++i) {
    containerOfThreads.push_back(std::make_shared<std::thread>(collect));
  }

// join all the threads back together.
  for (auto t : containerOfThreads) t->join();

// ******************Stage 3****************
// Sort big buckets

  for (int j = 0; j < 10; ++j) {
    bigbuckets.push_back(std::make_shared<std::vector<unsigned int>>(std::vector<unsigned int>()));
  }
  std::function<void(unsigned int, unsigned int)> sort_big_buckets;
  sort_big_buckets = [this, &bigbuckets, &mutex]
  (unsigned int start, unsigned int end) {
    unsigned int curr = start;
    while(curr < end){

        auto mybucket = bigbuckets[curr];
        std::sort(mybucket->begin(),mybucket->end(), [](const unsigned int& x, const unsigned int& y){
            std::string tmp1 = std::to_string(x);
            std::string tmp2 = std::to_string(y);
            return lexicographical_compare(tmp1.begin(), tmp1.end(), tmp2.begin(), tmp2.end());
            //return aLessB(x,y,0);
        } );
        ++curr;
    }
  };
// create a container of threads
  containerOfThreads.clear();

  workload = 10 / numCores;
// create threads and add them to the container.
  for (unsigned int i = 0; i < numCores; ++i) {
    // start the thread.
    unsigned int start = workload * i;
    unsigned int end = workload * (i + 1);
    if(i == numCores - 1) end = 10 ;
    containerOfThreads.push_back(std::make_shared<std::thread>(sort_big_buckets, start, end));
  }

// join all the threads back together.
  for (auto t : containerOfThreads) t->join();
// put all elements back to numbersToSort
  for (auto i = bigbuckets.begin(); i != bigbuckets.end(); ++i) {
    numbersToSort.insert(numbersToSort.end(), (*i)->begin(), (*i)->end());
  }
}
void排序(unsigned int numCores,std::vector numbersToSort){
//**********************第一阶段****************
//使用多线程使用最高有效位将数字分隔为存储桶
auto smallbucket=std::vector();
std::互斥互斥;
unsigned int workload=numbersToSort.size()/numCores;
std::函数put_小_桶;
put_small_bucket=[此、smallbucket和互斥]
(无符号整数id、无符号整数开始、无符号整数结束){
auto bucket=std::make_shared(std::vector());
对于(int j=0;j<10;++j){
bucket->push_back(std::vector());
}
for(无符号整数i=start;inumbersToSort.size();
containerOfThreads.push_back(std::make_shared(put_small_bucket,i,start,end));
}
//把所有的线重新连接在一起。
对于(auto t:containerOfThreads)t->join();
numbersToSort.clear();
//**********************第2阶段****************
//将小的多线程存储桶放回基数(10)的存储桶
auto BigBucket=std::vector();
对于(int j=0;j<10;++j){
bucket.push_back(std::make_shared(std::vector());
}
int当前_指数=10;
std::函数收集;
collect=[this,&smallbucket,¤t_索引,&mutex,&collect,&bigbugkets](){
mutex.lock();
int index=--当前_索引;
mutex.unlock();
如果(指数<0)返回;
自动mybucket=BigBucket[索引];
对于(自动i=smallbucket.begin();i!=smallbucket.end();++i){
mybucket->insert(mybucket->end(),(*(*i))[index].begin(),(*(*i))[index].end();
}
收集();
};
//创建一个线程容器
containerOfThreads.clear();
//创建线程并将其添加到容器中。
for(无符号整数i=0;ijoin();
//**********************第3阶段****************
//分类大水桶
对于(int j=0;j<10;++j){
bucket.push_back(std::make_shared(std::vector());
}
std::函数排序\u大\u桶;
sort_big_bucket=[这个,&bigbackes,&mutex]
(无符号整数开始,无符号整数结束){
无符号整数电流=开始;
而(当前<结束){
自动mybucket=BigBucket[curr];
排序(mybucket->begin(),mybucket->end(),[])(常量无符号整数和x,常量无符号整数和y){
std::string tmp1=std::to_string(x);
std::string tmp2=std::to_string(y);
返回字典形式的比较(tmp1.begin(),tmp1.end(),tmp2.begin(),tmp2.end());
//返回alesb(x,y,0);
} );
++咖喱;
}
};
//创建一个线程容器
containerOfThreads.clear();
工作量=10/个月;
//创建线程并将其添加到容器中。
for(无符号整数i=0;ijoin();
//将所有元素放回numbersToSort
for(自动i=bigbackes.begin();i!=bigbackes.end();++i){
插入(numbersToSort.end(),(*i)->begin(),(*i)->end());
}
}
我不知道如何在我的代码中实现这一点,我不确定bucket[]在上面的代码中做了什么。有人能解释一下我应该做什么改变吗

老实说,不需要桶[]。这里的想法是保留bucket开头的索引,但是由于后面的bucket是按相同的顺序逐个处理的,因此可以使用几个额外的变量来代替这个数组

正如我所说,您应该将数字转换为字符串并对字符串进行排序。这样,您就可以在每个bucketing中检查1个字符,而不是执行所有创建字符串->比较->销毁字符串操作。最后,您必须将字符串转换回数字

您询问的代码部分可以如下所示:

void radixSort(std::vector<std::string>::iterator begin, std::vector<std::string>::iterator end, int digit){
    if (begin == end)
        return;

    // first skip short numbers
    e = begin;
    for (auto p = begin; p != end; ++p)
        if (p->size() <= digit)
        {
            if (p != e)
                std::swap(*p, *e);
            q++;
        }
    if (e == end)
        return;

    for (char d = '0'; d <= '9'; ++d)
    {
        auto s = e;
        for (auto p = e; p != end; ++p)
            if (p->at(digit) == d)
            {
                if (p != e)
                    std::swap(*p, *e);
                e++;
            }
        radixSort(s, e, digit+1);
    }
}
radixSort(v.begin(), v.end(), 0);
与你的