C++；试图提高pthread程序的性能我需要帮助提高我的多线程程序在C++中的速度。 std::vector<double> solve_progon(std::vector<std::vector<double_C++_Multithreading_Pthreads

C++；试图提高pthread程序的性能我需要帮助提高我的多线程程序在C++中的速度。 std::vector<double> solve_progon(std::vector<std::vector<double

c++ multithreading

C++；试图提高pthread程序的性能我需要帮助提高我的多线程程序在C++中的速度。 std::vector<double> solve_progon(std::vector<std::vector<double,c++,multithreading,pthreads,C++,Multithreading,Pthreads,C++；试图提高pthread程序的性能我需要帮助提高我的多线程程序在C++中的速度。 std::vector<double> solve_progon(std::vector<std::vector<double> > A, std::vector <double> B) { // solving } std::vector<double> solve(std::vector<double> lef

C++；试图提高pthread程序的性能我需要帮助提高我的多线程程序在C++中的速度。

std::vector<double> solve_progon(std::vector<std::vector<double> > A, std::vector <double> B) {
  // solving
}

std::vector<double> solve(std::vector<double> left, std::vector<double> mid, std::vector<double> right, int j) {
    //solving
}

void * calc(void *thread) {
    long t = (long) thread;
    int start_index = t * (X_SIZE / THREADS); 
    int end_index = (t != THREADS - 1)?(t + 1) * (X_SIZE / THREADS) - 1: X_SIZE - 1;
    std::vector<std::vector<double> > local, next;
    std::vector<double> zeros;
    for (int i = 0; i < Y_SIZE; i++) {
        zeros.push_back(0);
    }
    double cur_time = 0;
    while (cur_time < T) {
        for (int i = start_index; i <= end_index; i ++) {
                next.push_back(solve(phi[i - 1], phi[i], phi[i + 1], i - start_index));
        }
        cur_time += dt;
        pthread_barrier_wait(&bar);
        for (int i = start_index; i <=end_index; i++) {
            phi[i] = next[i - start_index];
        }
        next.clear();
        pthread_barrier_wait(&syn);
    }
    pthread_exit(NULL);
}

int main(int argc, char **argv) {
    //Some init
    pthread_barrier_init(&bar, NULL, THREADS);
    pthread_barrier_init(&syn, NULL, THREADS);
    pthread_t *threads = new pthread_t[THREADS];
    unsigned long long start = clock_time();
    for (long i = 0; i < THREADS; i++) {
        if (pthread_create(&threads[i], NULL, calc, (void *)i) != 0) {
            std::cout << "Can't create thread " << i << std::endl;
        }
    }
    for (int i = 0; i < THREADS; i++) {
        pthread_join(threads[i], NULL);
    }
    std::cout << "It takes " << (double)(clock_time() - start) / 1e9 << std::endl;


    return 0;
}

std:：vector solve\u progon（std:：vector A，std:：vector B）{
//解决
}
std:：vector solve（std:：vector left，std:：vector mid，std:：vector right，int j）{
//解决
}
空心*计算（空心*螺纹）{
长t=（长）螺纹；
int start_index=t*（X_大小/线程）；
int end_index=（t！=线程-1）？（t+1）*（X_大小/线程）-1:X_大小-1；
std：：向量本地，下一个；
std：：向量零；
对于（int i=0；i对于（int i=start_index；i首先，停止复制向量。改为通过const引用传递它们。然后，启用编译器优化。这些障碍是什么？您有多核机器吗？如果不创建线程来执行此工作，实际上不会并行运行，这只会增加切换线程的开销。如果此代码有效，则为off-t这里是opic。您可能想在上演示它。这个问题似乎离题了，因为它是关于性能的，但是关于bug修复。