Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/162.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ windows上的pthread和多核_C++_Multithreading_Pthreads - Fatal编程技术网

C++ windows上的pthread和多核

C++ windows上的pthread和多核,c++,multithreading,pthreads,C++,Multithreading,Pthreads,我的问题涉及到pthread库和使用多核系统。在适当的参数和较小的数据量下,系统似乎有所改进,最大的改进是65000左右。数据表明,当您增加线程时,它开始减少所需的时间,但随后不久就会增加。当线程数=1,2,4时,它可能缓慢增加,有时为8,但16时,时间又开始减少。在大数据量的情况下,没有任何改进,并且始终保持相当接近。 如果有人能告诉我是不是有什么事情迫使我的线程按顺序运行,或者是另一个问题,那就太棒了 这是数据 1395525080 0 num thread: 1 data size: 10

我的问题涉及到pthread库和使用多核系统。在适当的参数和较小的数据量下,系统似乎有所改进,最大的改进是65000左右。数据表明,当您增加线程时,它开始减少所需的时间,但随后不久就会增加。当线程数=1,2,4时,它可能缓慢增加,有时为8,但16时,时间又开始减少。在大数据量的情况下,没有任何改进,并且始终保持相当接近。 如果有人能告诉我是不是有什么事情迫使我的线程按顺序运行,或者是另一个问题,那就太棒了

这是数据

1395525080 0 num thread: 1 data size: 1024 0 1395525080
1395525080 0 num thread: 2 data size: 1024 0 1395525080
1395525080 0 num thread: 4 data size: 1024 0 1395525080
1395525080 15 num thread: 8 data size: 1024 0 1395525080
1395525080 47 num thread: 16 data size: 1024 0 1395525080
1395525080 31 num thread: 32 data size: 1024 0 1395525080
1395525080 16 num thread: 1 data size: 4096 0 1395525080
1395525080 0 num thread: 2 data size: 4096 0 1395525080
1395525080 0 num thread: 4 data size: 4096 0 1395525080
1395525080 15 num thread: 8 data size: 4096 0 1395525080
1395525080 78 num thread: 16 data size: 4096 0 1395525080
1395525080 31 num thread: 32 data size: 4096 0 1395525080
1395525080 140 num thread: 1 data size: 65536 0 1395525080
1395525081 156 num thread: 2 data size: 65536 0 1395525081
1395525081 109 num thread: 4 data size: 65536 0 1395525081
1395525081 94 num thread: 8 data size: 65536 0 1395525081
1395525081 93 num thread: 16 data size: 65536 0 1395525081
1395525081 187 num thread: 32 data size: 65536 0 1395525082
1395525082 171 num thread: 1 data size: 75536 0 1395525082
1395525082 172 num thread: 2 data size: 75536 0 1395525082
1395525082 141 num thread: 4 data size: 75536 0 1395525083
1395525083 109 num thread: 8 data size: 75536 0 1395525083
1395525083 140 num thread: 16 data size: 75536 0 1395525083
1395525083 234 num thread: 32 data size: 75536 0 1395525084
1395525084 203 num thread: 1 data size: 85536 0 1395525084
1395525084 203 num thread: 2 data size: 85536 0 1395525084
1395525084 172 num thread: 4 data size: 85536 0 1395525085
1395525085 202 num thread: 8 data size: 85536 0 1395525085
1395525085 125 num thread: 16 data size: 85536 0 1395525085
1395525085 187 num thread: 32 data size: 85536 0 1395525086
1395525086 125 num thread: 1 data size: 55536 0 1395525086
1395525086 109 num thread: 2 data size: 55536 0 1395525086
1395525086 141 num thread: 4 data size: 55536 0 1395525086
1395525086 78 num thread: 8 data size: 55536 0 1395525086
1395525087 140 num thread: 16 data size: 55536 0 1395525087
1395525087 156 num thread: 32 data size: 55536 0 1395525087
1395525120 153271 num thread: 1 data size: 70000000 153 1395525274
1395525398 152630 num thread: 2 data size: 70000000 152 1395525551
1395525675 154846 num thread: 4 data size: 70000000 154 1395525830
1395525956 153988 num thread: 8 data size: 70000000 153 1395526110
1395526236 153956 num thread: 16 data size: 70000000 153 1395526390
1395526515 157935 num thread: 32 data size: 70000000 157 1395526673
这是代码,它做了一个传统的桶排序,我有另外两个相似的,数据相似的,也做桶排序,顺序代码生成几乎完全相同的值

struct bucket
{
    std::vector<int> data;

} ;


void *sort_bucket(void *unsorted_bucket);
int _tmain(int argc, _TCHAR* argv[])
{
    int array_N[] = {1024, 4096, 65536,75536,85536,55536, 70000000, 16777216};
    int array_number_of_threads[] = {1, 2, 4, 8, 16, 32};
    std::vector<int> N;
    std::vector<int> number_of_threads;
    number_of_threads.assign(array_number_of_threads, array_number_of_threads+6);
    N.assign(array_N, array_N+7);

    for(int size_index = 0; size_index < N.size(); size_index++)
    {
        for(int thread_index = 0; thread_index < number_of_threads.size(); thread_index++)
        {
            std::vector<int> unsorted_data;
            std::vector<int> sorted_data;
            std::vector<std::thread> thread_array;
            std::vector<bucket> buckets;

            std::vector<pthread_t> thread;

            while(buckets.size() < number_of_threads[thread_index]){ // checks against the number of threads and creates the number of buckets
                bucket new_bucket;
                pthread_t new_thread;
                buckets.push_back(new_bucket);
                thread.push_back(new_thread);
            }

            for(int index = 0; index < N[size_index]; index++) // gathers the data
            {
                unsorted_data.push_back(rand() % N[size_index]);
            }

            clock_t t = 0;
            t = clock();
            time_t start = 0;
            time_t end = 0;

            time(&start);
            std::cout << start << " ";

            int difference = N[size_index]/number_of_threads[thread_index];
            int placeholder = 0;
            for(int index = 0; index < N[size_index]; index++) {//calculates which bucket the data belong in and places the data in that bucket
                //std::cout << unsorted_data[index] << " " << difference << " ";
                placeholder = unsorted_data[index]/difference;
                //std::cout << placeholder << std::endl;
                buckets[placeholder].data.push_back(unsorted_data[index]);
            }
            for(int index = 0; index < number_of_threads[thread_index]; index++){ // sends the data to the threads
                //thread_array.push_back(std::thread(sort_bucket ,buckets[index]));
                pthread_create(&thread[index],
                               NULL,
                               sort_bucket ,
                               (void*) &buckets[index].data);
            }
            // bring the data back to the root process
            for(int index = 0; index < number_of_threads[thread_index]; index++)        {
                void *data;
                struct bucket *ret_bucket;
                pthread_join(thread[index],(void**) &data);
                ret_bucket = (struct bucket *) data;
                sorted_data.insert(sorted_data.end(), ret_bucket->data.begin(), ret_bucket->data.end());
                //sorted_data.assign(ret_bucket->data.begin(), ret_bucket->data.end());
            }
            /*
             for(int index = 0; index < sorted_data.size(); index++)
             {
             std::cout << sorted_data[index] << " ";
             }
             */

            t = clock() - t;
            std::cout << t << " ";
            t = t/CLOCKS_PER_SEC;
            std::cout << "num thread: " << number_of_threads[thread_index] << " ";
            std::cout << "data size: " << N[size_index] << " ";
            std::cout << t << " ";
            time(&end);
            std::cout << end << std::endl;



            sort(unsorted_data.begin(), unsorted_data.end());

            for(int index = 0; index < unsorted_data.size(); index++)
            {
                if(unsorted_data[index] != sorted_data[index])
                {
                    std::cout << "data sorting failed" << std::endl;
                }
            }
        }
    }
    int placeholder;
    std::cin >> placeholder;
    return 0;
}

void *sort_bucket(void *unsorted_bucket)
{  
    bucket *temp_sorted_bucket = (struct bucket *) unsorted_bucket;  
    std::sort(temp_sorted_bucket->data.begin(), temp_sorted_bucket->data.end()); 

    /*for(int index = 0; index < temp_sorted_bucket->data.size(); index++)
     {
     std::cout << temp_sorted_bucket->data.at(index) << " ";
     }*/
    pthread_exit(temp_sorted_bucket);
    return 0; 
}
struct bucket
{
std::矢量数据;
} ;
void*排序桶(void*未排序桶);
int _tmain(int argc,_TCHAR*argv[]
{
int数组{1024,4096655367553685536555367000000016777216};
int数组_线程数_[]={1,2,4,8,16,32};
std::向量N;
std::线程的向量数;
分配(数组线程数,数组线程数+6);
N.赋值(数组N,数组N+7);
对于(int size_index=0;size_indexstd::cout请记住,您的线程受到CPU上物理内核数量的限制。一旦达到限制,它必须使用资源在同一个内核上的线程之间切换,这需要时间。例如,i3处理器有2个物理内核,具有超读功能,在CPU上提供4个虚拟内核,因此超过4个线程的任何内容都将被删除通常不会带来任何好处。

与cpu密集型程序或在多个设备上执行并行I/O的程序相比,此类程序大多内存带宽有限。由于只有内存总线上的程序,通常有一个公共外部缓存,多线程不会有多大帮助,在某些情况下,由于缓存冲突,可能会降低速度。那么1核和4核之间没有变化,我有一个I5,特别是对于如此大的数据量,其限制是什么除了可用处理器的数量外,还必须考虑许多其他变量,包括原始注释中提到的内存带宽限制。不幸的是,并不总是有一个明确的答案,有时你会我们只需要对其进行基准测试,就可以确定什么是最佳的。在本例中,似乎4个线程通常是最佳的(因为i5只有4个内核),而更高的内存需求开始使它陷入瓶颈,以至于内核/线程的数量变得无关紧要。