Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/138.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/multithreading/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ C++;并行就地基数排序_C++_Multithreading_Sorting_C++11_Radix Sort - Fatal编程技术网

C++ C++;并行就地基数排序

C++ C++;并行就地基数排序,c++,multithreading,sorting,c++11,radix-sort,C++,Multithreading,Sorting,C++11,Radix Sort,我正在尝试用基数256实现并行就地基数排序。在我看来,函数srt在单线程中运行良好。但当更多线程用于随机数据时,有时会出现错误:“访问冲突读取位置”后跟函数srt的“marker[index]”地址。它在函数srt的第15行中断,即“tmp=marker[index]”,其中index的值为63。有人能解释发生了什么事吗 inline void count(unsigned* list, int size, int* histogram) { for (int i = 0; i <

我正在尝试用基数256实现并行就地基数排序。在我看来,函数srt在单线程中运行良好。但当更多线程用于随机数据时,有时会出现错误:“访问冲突读取位置”后跟函数srt的“marker[index]”地址。它在函数srt的第15行中断,即“tmp=marker[index]”,其中index的值为63。有人能解释发生了什么事吗

inline
void count(unsigned* list, int size, int* histogram) {
    for (int i = 0; i < size; ++i) {
        ++histogram[(list[i]>>24)];
    }
}
void srt(int* histogram, unsigned** marker) {
    static const int bin_size = 256;
    int left = histogram[0];
    int index;
    int tmp;
    while (left-- > 0) {
        index = *marker[0] >> 24;
        while (index != 0) {
            tmp = *marker[index];
            *marker[index]++ = *marker[0];
            *marker[0] = tmp;
            index = *marker[0] >> 24;
        }
        ++marker[0];
    }
    for (int k = 1; k < bin_size; ++k) {
        left = histogram[k] - (marker[k] - marker[k - 1]);
        while (left-- > 0) {
            index = *marker[k] >> 24;
            while (index != k) {
                tmp = *marker[index];
                *marker[index]++ = *marker[k];
                *marker[k] = tmp;
                index = *marker[k] >> 24;
            }
            ++marker[k];
        }
    }
}
void parallel_sort(unsigned* list, int size) {
    //Build histogram
    static const int bin_size = 256;
    int histogram[bin_size] = { 0 };
    int histogram1[bin_size] = { 0 };
    int histogram2[bin_size] = { 0 };
    int histogram3[bin_size] = { 0 };
    const int partial_size = size / 4;
    count(list, partial_size, histogram);
    count(&list[partial_size], partial_size, histogram1);
    count(&list[2 * partial_size], partial_size, histogram2);
    count(&list[3 * partial_size], partial_size + (size % 4), histogram3);
    unsigned int* marker[bin_size];
    unsigned int* marker1[bin_size];
    unsigned int* marker2[bin_size];
    unsigned int* marker3[bin_size];
    unsigned int* previous = list;
    for ( int i = 0; i < bin_size; ++i ) {
        marker[i] = previous;
        marker1[i] = marker[i] + histogram[i];
        marker2[i] = marker1[i] + histogram1[i];
        marker3[i] = marker2[i] + histogram2[i];
        previous = marker3[i] + histogram3[i];
    }
    //Breaks in srt in any of those threads
    thread t21(srt, histogram1, marker1);
    thread t22(srt, histogram2, marker2);
    thread t23(srt, histogram3, marker3);
    srt(histogram, marker);
    t21.join();
    t22.join();
    t23.join();
//TODO
}
int main() {
    const int size = 100000;
    unsigned list[size];
    srand(time(NULL));
    for (int i = 0;i < size;++i) {
        list[i] = rand()*rand();
    }
    parallel_sort(list, size);
}
问题在于:

int main() {
    const int size = 100000;
    unsigned list[size];
    srand(time(NULL));
    for (int i = 0;i < size;++i) {
        list[i] = rand()*rand();
    }
    parallel_sort(list, size);
}
intmain(){
常数int size=100000;
未签名列表[大小];
srand(时间(空));
对于(int i=0;i
那么如果
list[i]=rand()*rand()导致的数字大于
大小
即100000?您的
标记
数组指针将绝对超出
列表
界限,这就是程序崩溃的原因


总而言之,由于您使用的是基数排序,您应该确保所有要排序的值都在数组最大大小之下。

请尝试下面的并行基数代码:

#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>

enum errors {
  INVALID_USAGE,
  ERROR_OPENING_INPUT,
  ERROR_OPENING_OUTPUT,
  INVALID_FORMAT,
  MEMORY_ERROR,
  THREAD_CREATE_ERROR,
  THREAD_JOIN_ERROR
};

const unsigned char KEY_LENGTH = 7;
const unsigned char CHARKEY_LENGTH = KEY_LENGTH + 1;
const unsigned short INPUT_SPACE = 256;

struct thread_info
{
  unsigned char **input;
  unsigned char **output;
  unsigned short thread_idx;
  unsigned int first_idx;
  unsigned int last_idx;
  pthread_barrier_t *barrier;
  unsigned int *local_counters; // array of array of integer
  unsigned int thread_count;
};

inline unsigned int coordinate(const unsigned int line)
{
  return (line * CHARKEY_LENGTH);
}

// map each string of the memory array input to a pointer in output
inline void map_strings(unsigned char *const input, unsigned char 
**output, const unsigned int size)
{
  for(unsigned int i = 0; i < size; ++i){
    output[i] = &(input[coordinate(i)]);
  }
}

inline unsigned short core_count()
{
  // Linux, Solaris, AIX, etc:
  return (unsigned short)sysconf(_SC_NPROCESSORS_ONLN);
}

void print_usage(const char *const prog_name)
{
  fprintf(stderr, "Usage: "
                  "%s input_file ouput_file\n", prog_name);
}

/*
  counter the occurences of each char in input at radix i
  first_idx and last_idx define the range of the input where the count 
is needed
  the result is stored in counter
 */
inline void count_char(const unsigned char *const *const input,
                       const unsigned int first_idx,
                       const unsigned int last_idx,
                       const unsigned int radix,
                       unsigned int *counter)
{
  memset(counter, 0, INPUT_SPACE*sizeof(unsigned int));
  for(unsigned int i = first_idx; i < last_idx; ++i){
    const unsigned char c = input[i][radix];
    ++(counter[c]);
  }
}

/* compute the offset of the current thread
   local_counters: array of all coutners
   thread_idx: current thread idx
   thread_count: number of thread
   offset: the array which is populated with the offset
*/
inline void compute_offset(const unsigned int *const local_counters,
                           const unsigned int thread_idx,
                           const unsigned int thread_count,
                           unsigned int *const offset){
  unsigned int local_offset[INPUT_SPACE];
  unsigned int global_counter[INPUT_SPACE];
  for(unsigned int i = 0; i < INPUT_SPACE; ++i){
    global_counter[i] = 0;
    for(unsigned int thread_i = 0; thread_i < thread_count; ++thread_i)
{
      if (thread_idx == thread_i){
        local_offset[i] = global_counter[i];
      }
      const unsigned int value = local_counters[thread_i*INPUT_SPACE + 
i];
      global_counter[i] += value;
    }
  }
  unsigned int previous_offset = 0;
  for(unsigned int i = 1; i < INPUT_SPACE; ++i){
    previous_offset += global_counter[i-1];
    offset[i] = previous_offset + local_offset[i];
  }
}

inline void sort_input(unsigned char *const *const input,
                       unsigned int *const offset,
                       const unsigned int first_idx,
                       const unsigned int last_idx,
                       const unsigned int radix,
                       unsigned char **const output){
  for(unsigned int i = first_idx; i < last_idx; ++i){
    const unsigned char c = input[i][radix];
    const unsigned int current_offset = offset[c]++;
    output[current_offset] = input[i];
  }
}

void *concurrent_radix(void *arg)
{
  thread_info *const info = (thread_info*)arg;
  const unsigned short thread_idx = info->thread_idx;
  const unsigned int first_idx = info->first_idx;
  const unsigned int last_idx = info->last_idx;
  const unsigned int thread_count = info->thread_count;
  unsigned int *const local_counters = info->local_counters;
  unsigned char **input = info->input;
  unsigned char **output = info->output;
  pthread_barrier_t *const barrier = info->barrier;
  unsigned int *const counter = &(local_counters[thread_idx * 
INPUT_SPACE]);
  int radix = KEY_LENGTH - 1;
  do {
    count_char(input, first_idx, last_idx, radix, counter);

    pthread_barrier_wait(barrier);

    unsigned int offset[INPUT_SPACE];
    compute_offset(local_counters, thread_idx, thread_count, offset);
    sort_input(input, offset, first_idx, last_idx, radix, output);

    pthread_barrier_wait(barrier);

    unsigned char **const temp = input;
    input = output;
    output = temp;
    --radix;
  } while(radix >= 0);
  return NULL;
}

// sort the radix index
inline void threaded_radix (unsigned char **input, unsigned char 
**output, const unsigned int nb_keys)
{
  const unsigned short nb_core = core_count();

  pthread_t threads[nb_core];
  thread_info threads_arg[nb_core];
  unsigned int local_counters[nb_core * INPUT_SPACE];

  pthread_barrier_t barrier;
  pthread_barrier_init(&barrier, NULL, nb_core);

  const unsigned int range = nb_keys / nb_core;
  unsigned int last_idx = 0;
  for (unsigned short i = 0; i < nb_core; ++i){
    const unsigned int first_idx = last_idx;
    last_idx = last_idx + range;

thread_info &info = threads_arg[i];
info.input = input;
info.output = output;
info.first_idx = first_idx;
info.last_idx = last_idx;
info.thread_idx = i;
info.thread_count = nb_core;
info.local_counters = local_counters;
info.barrier = &barrier;
  }
  threads_arg[nb_core-1].last_idx = nb_keys;

  for (unsigned short i = 1; i < nb_core; ++i){
    pthread_create(&threads[i], NULL, concurrent_radix, (void *)&
(threads_arg[i]));
  }
  concurrent_radix((void *)&(threads_arg[0]));

  for (unsigned short i = 1; i < nb_core; ++i){
    pthread_join(threads[i], NULL);
  }
  pthread_barrier_destroy(&barrier);
}

inline void radix_sort(unsigned char *input, unsigned char **output, 
const unsigned int nb_keys)
{
  unsigned char **buffer = (unsigned char **)malloc(nb_keys * 
sizeof(unsigned char*));
  map_strings(input, buffer, nb_keys);
  threaded_radix(buffer, output, nb_keys);

  free(buffer);
}

int main(const int argc, const char *const argv[])
{
  if(argc < 3) {
    print_usage(argv[0]);
    return INVALID_USAGE;
  }
  // import the data in a table
  FILE *input = fopen(argv[1], "r");
  if(!input){
    const int errsv = errno;
fprintf(stderr, "%s: %s\n", argv[1], strerror(errsv));
    print_usage(argv[0]);
    return ERROR_OPENING_INPUT;
  }

  unsigned int input_size;
  {
    int converted = fscanf(input, "%d\n", &input_size);
    if(converted != 1){
      fprintf(stderr, "Invalid file format");
      return INVALID_FORMAT;
    }
  }

  unsigned char *input_table = (unsigned char *)malloc(input_size * 
CHARKEY_LENGTH * sizeof(unsigned char));
  unsigned char **output_table = (unsigned char **)malloc(input_size * 
sizeof(unsigned char*));
  if(!input_table || !output_table){
    fprintf(stderr, "Error: not enough memory\n");
    return MEMORY_ERROR;
  }

  for(unsigned int i = 0; i<input_size; ++i){
unsigned char * key = &(input_table[coordinate(i)]);
size_t size_read = fread(key, 1, CHARKEY_LENGTH, input);
key[KEY_LENGTH] = '\0';
    if(size_read != CHARKEY_LENGTH){
      if(feof(input)){
        assert(i == (input_size - 1));
      } else {
        fprintf(stderr, "Invalid file format");
        return INVALID_FORMAT;
      }
    }
  }
  fclose(input);

  // sort
  struct timeval tick1, tick2;
  gettimeofday(&tick1, NULL);

  radix_sort(input_table, output_table, input_size);

  gettimeofday(&tick2, NULL);
  const double ellapsed = (tick2.tv_sec + tick2.tv_usec/1000000.0) - 
(tick1.tv_sec + tick1.tv_usec/1000000.0);
  printf("time for action = %g seconds\n", ellapsed);

  // write the output
  FILE *output = fopen(argv[2], "w+");
  if(!input){
const int errsv = errno;
    fprintf(stderr, "%s: %s\n", argv[2], strerror(errsv));
    print_usage(argv[0]);
    return ERROR_OPENING_OUTPUT;
  }

  fprintf(output, "%d\n", input_size);
  for(unsigned int i = 0; i < input_size;++i){
    fprintf(output, "%s\n", output_table[i]);
  }
  fclose(output);

  free(input_table);
  free(output_table);

  return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
枚举错误{
无效的_用法,
打开输入时出错,
打开输出时出错,
无效的\u格式,
内存错误,
线程\u创建\u错误,
线程连接错误
};
const unsigned char KEY_LENGTH=7;
const unsigned char CHARKEY_LENGTH=KEY_LENGTH+1;
常量无符号短输入_空间=256;
结构线程信息
{
无符号字符**输入;
无符号字符**输出;
无符号短线程_idx;
无符号int-first_-idx;
无符号int last_idx;
pthread_barrier_t*barrier;
无符号int*本地_计数器;//整数数组的数组
无符号整数线程计数;
};
内联无符号整数坐标(常量无符号整数线)
{
返回(行*字符长度);
}
//将内存数组输入的每个字符串映射到输出中的指针
内联无效映射_字符串(无符号字符*常量输入,无符号字符
**输出,常量无符号整数大小)
{
for(无符号整数i=0;ithread\u idx;
const unsigned int first\u idx=info->first\u idx;
const unsigned int last_idx=info->last_idx;
const unsigned int thread\u count=info->thread\u count;
unsigned int*const local_counters=info->local_counters;
无符号字符**输入=信息->输入;
无符号字符**输出=信息->输出;
pthread\u barrier\u t*const barrier=info->barrier;
无符号整数*常量计数器=&(本地计数器[线程idx*
输入空间];
int基数=键长度-1;
做{
count_char(输入、第一个_idx、最后一个_idx、基数、计数器);
pthread_barrier_wait(barrier);
无符号整数偏移量[输入空间];
计算线程偏移量(本地线程计数器、线程idx、线程计数、偏移量);
排序输入(输入、偏移、第一个idx、最后一个idx、基数、输出);
pthread_barrier_wait(barrier);
无符号字符**常量温度=输入;
输入=输出;
输出=温度;
--根;
}而(基数>=0);
返回NULL;
}
//对基数索引进行排序
内联无效线程基数(无符号字符**输入,无符号字符
**输出,常量无符号整数(nb_键)
{
const unsigned short nb_core=core_count();
pthread_t threads[nb_core];
线程信息线程参数[nb_核心];
无符号整数本地_计数器[nb_核心*输入_空间];
pthread_barr_barr
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>

enum errors {
  INVALID_USAGE,
  ERROR_OPENING_INPUT,
  ERROR_OPENING_OUTPUT,
  INVALID_FORMAT,
  MEMORY_ERROR,
  THREAD_CREATE_ERROR,
  THREAD_JOIN_ERROR
};

const unsigned char KEY_LENGTH = 7;
const unsigned char CHARKEY_LENGTH = KEY_LENGTH + 1;
const unsigned short INPUT_SPACE = 256;

struct thread_info
{
  unsigned char **input;
  unsigned char **output;
  unsigned short thread_idx;
  unsigned int first_idx;
  unsigned int last_idx;
  pthread_barrier_t *barrier;
  unsigned int *local_counters; // array of array of integer
  unsigned int thread_count;
};

inline unsigned int coordinate(const unsigned int line)
{
  return (line * CHARKEY_LENGTH);
}

// map each string of the memory array input to a pointer in output
inline void map_strings(unsigned char *const input, unsigned char 
**output, const unsigned int size)
{
  for(unsigned int i = 0; i < size; ++i){
    output[i] = &(input[coordinate(i)]);
  }
}

inline unsigned short core_count()
{
  // Linux, Solaris, AIX, etc:
  return (unsigned short)sysconf(_SC_NPROCESSORS_ONLN);
}

void print_usage(const char *const prog_name)
{
  fprintf(stderr, "Usage: "
                  "%s input_file ouput_file\n", prog_name);
}

/*
  counter the occurences of each char in input at radix i
  first_idx and last_idx define the range of the input where the count 
is needed
  the result is stored in counter
 */
inline void count_char(const unsigned char *const *const input,
                       const unsigned int first_idx,
                       const unsigned int last_idx,
                       const unsigned int radix,
                       unsigned int *counter)
{
  memset(counter, 0, INPUT_SPACE*sizeof(unsigned int));
  for(unsigned int i = first_idx; i < last_idx; ++i){
    const unsigned char c = input[i][radix];
    ++(counter[c]);
  }
}

/* compute the offset of the current thread
   local_counters: array of all coutners
   thread_idx: current thread idx
   thread_count: number of thread
   offset: the array which is populated with the offset
*/
inline void compute_offset(const unsigned int *const local_counters,
                           const unsigned int thread_idx,
                           const unsigned int thread_count,
                           unsigned int *const offset){
  unsigned int local_offset[INPUT_SPACE];
  unsigned int global_counter[INPUT_SPACE];
  for(unsigned int i = 0; i < INPUT_SPACE; ++i){
    global_counter[i] = 0;
    for(unsigned int thread_i = 0; thread_i < thread_count; ++thread_i)
{
      if (thread_idx == thread_i){
        local_offset[i] = global_counter[i];
      }
      const unsigned int value = local_counters[thread_i*INPUT_SPACE + 
i];
      global_counter[i] += value;
    }
  }
  unsigned int previous_offset = 0;
  for(unsigned int i = 1; i < INPUT_SPACE; ++i){
    previous_offset += global_counter[i-1];
    offset[i] = previous_offset + local_offset[i];
  }
}

inline void sort_input(unsigned char *const *const input,
                       unsigned int *const offset,
                       const unsigned int first_idx,
                       const unsigned int last_idx,
                       const unsigned int radix,
                       unsigned char **const output){
  for(unsigned int i = first_idx; i < last_idx; ++i){
    const unsigned char c = input[i][radix];
    const unsigned int current_offset = offset[c]++;
    output[current_offset] = input[i];
  }
}

void *concurrent_radix(void *arg)
{
  thread_info *const info = (thread_info*)arg;
  const unsigned short thread_idx = info->thread_idx;
  const unsigned int first_idx = info->first_idx;
  const unsigned int last_idx = info->last_idx;
  const unsigned int thread_count = info->thread_count;
  unsigned int *const local_counters = info->local_counters;
  unsigned char **input = info->input;
  unsigned char **output = info->output;
  pthread_barrier_t *const barrier = info->barrier;
  unsigned int *const counter = &(local_counters[thread_idx * 
INPUT_SPACE]);
  int radix = KEY_LENGTH - 1;
  do {
    count_char(input, first_idx, last_idx, radix, counter);

    pthread_barrier_wait(barrier);

    unsigned int offset[INPUT_SPACE];
    compute_offset(local_counters, thread_idx, thread_count, offset);
    sort_input(input, offset, first_idx, last_idx, radix, output);

    pthread_barrier_wait(barrier);

    unsigned char **const temp = input;
    input = output;
    output = temp;
    --radix;
  } while(radix >= 0);
  return NULL;
}

// sort the radix index
inline void threaded_radix (unsigned char **input, unsigned char 
**output, const unsigned int nb_keys)
{
  const unsigned short nb_core = core_count();

  pthread_t threads[nb_core];
  thread_info threads_arg[nb_core];
  unsigned int local_counters[nb_core * INPUT_SPACE];

  pthread_barrier_t barrier;
  pthread_barrier_init(&barrier, NULL, nb_core);

  const unsigned int range = nb_keys / nb_core;
  unsigned int last_idx = 0;
  for (unsigned short i = 0; i < nb_core; ++i){
    const unsigned int first_idx = last_idx;
    last_idx = last_idx + range;

thread_info &info = threads_arg[i];
info.input = input;
info.output = output;
info.first_idx = first_idx;
info.last_idx = last_idx;
info.thread_idx = i;
info.thread_count = nb_core;
info.local_counters = local_counters;
info.barrier = &barrier;
  }
  threads_arg[nb_core-1].last_idx = nb_keys;

  for (unsigned short i = 1; i < nb_core; ++i){
    pthread_create(&threads[i], NULL, concurrent_radix, (void *)&
(threads_arg[i]));
  }
  concurrent_radix((void *)&(threads_arg[0]));

  for (unsigned short i = 1; i < nb_core; ++i){
    pthread_join(threads[i], NULL);
  }
  pthread_barrier_destroy(&barrier);
}

inline void radix_sort(unsigned char *input, unsigned char **output, 
const unsigned int nb_keys)
{
  unsigned char **buffer = (unsigned char **)malloc(nb_keys * 
sizeof(unsigned char*));
  map_strings(input, buffer, nb_keys);
  threaded_radix(buffer, output, nb_keys);

  free(buffer);
}

int main(const int argc, const char *const argv[])
{
  if(argc < 3) {
    print_usage(argv[0]);
    return INVALID_USAGE;
  }
  // import the data in a table
  FILE *input = fopen(argv[1], "r");
  if(!input){
    const int errsv = errno;
fprintf(stderr, "%s: %s\n", argv[1], strerror(errsv));
    print_usage(argv[0]);
    return ERROR_OPENING_INPUT;
  }

  unsigned int input_size;
  {
    int converted = fscanf(input, "%d\n", &input_size);
    if(converted != 1){
      fprintf(stderr, "Invalid file format");
      return INVALID_FORMAT;
    }
  }

  unsigned char *input_table = (unsigned char *)malloc(input_size * 
CHARKEY_LENGTH * sizeof(unsigned char));
  unsigned char **output_table = (unsigned char **)malloc(input_size * 
sizeof(unsigned char*));
  if(!input_table || !output_table){
    fprintf(stderr, "Error: not enough memory\n");
    return MEMORY_ERROR;
  }

  for(unsigned int i = 0; i<input_size; ++i){
unsigned char * key = &(input_table[coordinate(i)]);
size_t size_read = fread(key, 1, CHARKEY_LENGTH, input);
key[KEY_LENGTH] = '\0';
    if(size_read != CHARKEY_LENGTH){
      if(feof(input)){
        assert(i == (input_size - 1));
      } else {
        fprintf(stderr, "Invalid file format");
        return INVALID_FORMAT;
      }
    }
  }
  fclose(input);

  // sort
  struct timeval tick1, tick2;
  gettimeofday(&tick1, NULL);

  radix_sort(input_table, output_table, input_size);

  gettimeofday(&tick2, NULL);
  const double ellapsed = (tick2.tv_sec + tick2.tv_usec/1000000.0) - 
(tick1.tv_sec + tick1.tv_usec/1000000.0);
  printf("time for action = %g seconds\n", ellapsed);

  // write the output
  FILE *output = fopen(argv[2], "w+");
  if(!input){
const int errsv = errno;
    fprintf(stderr, "%s: %s\n", argv[2], strerror(errsv));
    print_usage(argv[0]);
    return ERROR_OPENING_OUTPUT;
  }

  fprintf(output, "%d\n", input_size);
  for(unsigned int i = 0; i < input_size;++i){
    fprintf(output, "%s\n", output_table[i]);
  }
  fclose(output);

  free(input_table);
  free(output_table);

  return 0;
}