C++ QtConcurrent:：map没有任何好处_C++_Multithreading_Qt_Qtconcurrent

C++ QtConcurrent:：map没有任何好处

c++ multithreading qt

C++ QtConcurrent:：map没有任何好处,c++,multithreading,qt,qtconcurrent,C++,Multithreading,Qt,Qtconcurrent,我想使用QtConcurrent:：map函数操作QVector。我的示例程序所做的就是将QVector中的所有值增加1 QVector<double> arr(10000000, 0); QElapsedTimer timer; qDebug() << QThreadPool::globalInstance()->maxThreadCount() << "Threads"; int end; /* * * * * * * * * * * * * *

我想使用

QtConcurrent:：map

函数操作

QVector

。我的示例程序所做的就是将

QVector

中的所有值增加1

QVector<double> arr(10000000, 0);
QElapsedTimer timer;
qDebug() << QThreadPool::globalInstance()->maxThreadCount() << "Threads";

int end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
    std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
}
end = timer.elapsed();
qDebug() << end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
    std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
}
end = timer.elapsed();
qDebug() << end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
    QFuture<void> qf = QtConcurrent::map(arr.begin(), arr.end(), [](double &x){ ++x; });
    qf.waitForFinished();
}
end = timer.elapsed();
qDebug() << end;

因此，多线程版本几乎没有速度优势。我验证了实际上有4个线程正在运行。我使用-O2优化。更常见的

QThreadPool

方法是否更适合这种情况

编辑：

我使用

QtConcurrent:：run（）

尝试了不同的方法。以下是程序代码的相关部分：

void add1(QVector<double>::iterator first, QVector<double>::iterator last) {
    for(; first != last; ++first) {
        *first += 1;
    }
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
QFuture<void> qf[numThreads];
for(int j = 0; j < numThreads; ++j) {
    qf[j] = QtConcurrent::run(add1, arr.begin()+j*n/numThreads, arr.begin()+(j+1)*n/numThreads-1);
}
for(int j = 0; j < numThreads; ++j) {
    qf[j].waitForFinished();
}

<> p>还是与简单的使用C++线程原语和滚入自己的线程池相比，开销很高。

template<class T>
struct threaded_queue {
  using lock = std::unique_lock<std::mutex>;
  void push_back( T t ) {
    {
      lock l(m);
      data.push_back(std::move(t));
    }
    cv.notify_one();
  }
  boost::optional<T> pop_front() {
    lock l(m);
    cv.wait(l, [this]{ return abort || !data.empty(); } );
    if (abort) return {};
    auto r = std::move(data.back());
    data.pop_back();
    return std::move(r);
  }
  void terminate() {
    {
      lock l(m);
      abort = true;
      data.clear();
    }
    cv.notify_all();
  }
  ~threaded_queue()
  {
    terminate();
  }
private:
  std::mutex m;
  std::deque<T> data;
  std::condition_variable cv;
  bool abort = false;
};
struct thread_pool {
  thread_pool( std::size_t n = 1 ) { start_thread(n); }
  thread_pool( thread_pool&& ) = delete;
  thread_pool& operator=( thread_pool&& ) = delete;
  ~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> queue_task( F task ) {
    std::packaged_task<R()> p(std::move(task));
    auto r = p.get_future();
    tasks.push_back( std::move(p) );
    return r;
  }
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> run_task( F task ) {
    if (threads_active() >= total_threads()) {
      start_thread();
    }
    return queue_task( std::move(task) );
  }
  void terminate() {
    tasks.terminate();
  }
  std::size_t threads_active() const {
    return active;
  }
  std::size_t total_threads() const {
    return threads.size();
  }
  void clear_threads() {
    terminate();
    threads.clear();
  }
  void start_thread( std::size_t n = 1 ) {
    while(n-->0) {
      threads.push_back(
        std::async( std::launch::async,
          [this]{
            while(auto task = tasks.pop_front()) {
              ++active;
              try{
                (*task)();
              } catch(...) {
                --active;
                throw;
              }
              --active;
            }
          }
        )
      );
    }
  }
private:
  std::vector<std::future<void>> threads;
  threaded_queue<std::packaged_task<void()>> tasks;
  std::atomic<std::size_t> active = {};
};

struct my_timer_t {
    std::chrono::high_resolution_clock::time_point first;
    std::chrono::high_resolution_clock::duration duration;

    void start() {
        first = std::chrono::high_resolution_clock::now();
    }
    std::chrono::high_resolution_clock::duration finish() {
        return duration = std::chrono::high_resolution_clock::now()-first;
    }
    unsigned long long ms() const {
        return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
    }
};
int main() {
    std::vector<double> arr(1000000, 0);
    my_timer_t timer;

    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
    }
    timer.finish();
    auto time_transform = timer.ms();
    std::cout << time_transform << "<- std::transform (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
    }
    timer.finish();
    auto time_for_each = timer.ms();
    std::cout << time_for_each << "<- std::for_each (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    enum { num_threads = 8 };
    thread_pool pool(num_threads);
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::array< std::future<void>, num_threads > tasks;
        for (int t = 0; t < num_threads; ++t) {
            tasks[t] = pool.run_task([&,t]{
                std::for_each( arr.begin()+(arr.size()/num_threads)*t, arr.begin()+(arr.size()/num_threads)*(t+1), [](double& x){++x;} );
            });
        }
        // std::cout << "loop! -- " << pool.threads_active() << "/" << pool.total_threads() << std::endl;
        for (int t = 0; t < num_threads; ++t)
            tasks[t].wait();
    }
    timer.finish();
    auto time_pool = timer.ms();
    std::cout << time_pool << "<- thread_pool (" << arr[rand()%arr.size()] << ")\n";
}

模板
结构线程队列{
使用lock=std:：unique_lock；
无效推回（T）{
{
锁l（m）；
数据。推回（std：：move（t））；
}
cv.通知_one（）；
}
boost：：可选的pop_front（）{
锁l（m）；
wait（l，[this]{return abort | | |！data.empty（）；}）；
if（abort）返回{}；
auto r=std:：move（data.back（））；
data.pop_back（）；
返回std：：move（r）；
}
无效终止（）{
{
锁l（m）；
中止=真；
data.clear（）；
}
cv.通知所有人（）；
}
~u队列（）
{
终止（）；
}
私人：
std：：互斥m；
std:：deque数据；
std：：条件变量cv；
bool abort=false；
};
结构线程池{
线程池（std:：size_t n=1）{start_线程（n）；}
线程池（线程池&&）=删除；
线程池&运算符=（线程池&&）=删除；
~thread_pool（）=default；//或`{terminate（）；}`如果要放弃某些任务
模板
std:：未来队列任务（F任务）{
std：：打包任务p（std：：移动（任务））；
自动r=p。获取未来（）；
任务。推回（std：：move（p））；
返回r；
}
模板
std:：未来运行任务（F任务）{
如果（线程数\活动线程数（）>=总线程数（））{
启动_线程（）；
}
返回队列_任务（std:：move（task））；
}
无效终止（）{
tasks.terminate（）；
}
std:：size\u t threads\u active（）常量{
主动返回；
}
std:：size\u t总线程数（）常量{
返回线程。size（）；
}
无效清除_线程（）{
终止（）；
线程。清除（）；
}
无效起始螺纹（标准：：尺寸n=1）{
而（n-->0）{
把你推回去(
std:：async（std:：launch:：async，
[本]{
while（auto task=tasks.pop_front（））{
++活跃的；
试一试{
（*任务）（）；
}捕获（…）{
--活跃的；
投掷；
}
--活跃的；
}
}
)
);
}
}
私人：
向量线程；
线程队列任务；
std:：原子活动={}；
};
构造我的计时器{
std:：chrono:：高分辨率时钟：：时间点优先；
标准：：时钟：：高分辨率时钟：：持续时间；
void start（）{
first=std:：chrono:：高分辨率时钟：：now（）；
}
标准：：时钟：：高分辨率时钟：：持续时间完成（）{
返回持续时间=std:：chrono:：高分辨率时钟：：now（）-第一；
}
无符号长ms（）常量{
返回std:：chrono:：duration_cast（duration）.count（）；
}
};
int main（）{
std：：向量arr（1000000,0）；
我的定时器；
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start（）；
对于（int i=0；i<100；++i）{
std:：transform（arr.begin（），arr.end（），arr.begin（），[]（双x）{return++x；}）；
}
timer.finish（）；
自动时间转换=timer.ms（）；
你为什么期望加速？你在每个循环迭代中都在等待未来。我不是这个领域的专家，但我期望map（）启动4个线程，这将使此代码行比STL函数完成得更快。或者我误解了此函数的概念吗？您如何测试QtConcurrent
的开销？请注意，您将++
操作分组为num_Threads
批。您也可以使用QtConcurrent
执行此操作。@m7913d这就是QtConcurrent
应该做的事情；根据硬件线程的数量启动一些子线程来处理部分任务。我只是手动完成了。我在上得到了显著的加速，每个QtConcurrent都将每个操作分配给一个线程（考虑最大并发线程数）。它不会对它们进行分组。请注意，如果操作花费的时间可能不相同，则不能直接正确地对操作进行分组。@m7913d哦，这太糟糕了。我错了，Qt通过查看用户和开销时间来实现一些分组。它从每批一个操作开始，每次开销比以前大时，它都会将其加倍e用户执行时间。因此，在使用大批量之前需要进行大量试验，因此会产生大量开销。
181 ms // std::for_each
163 ms // QtConcurrent::run

template<class T>
struct threaded_queue {
  using lock = std::unique_lock<std::mutex>;
  void push_back( T t ) {
    {
      lock l(m);
      data.push_back(std::move(t));
    }
    cv.notify_one();
  }
  boost::optional<T> pop_front() {
    lock l(m);
    cv.wait(l, [this]{ return abort || !data.empty(); } );
    if (abort) return {};
    auto r = std::move(data.back());
    data.pop_back();
    return std::move(r);
  }
  void terminate() {
    {
      lock l(m);
      abort = true;
      data.clear();
    }
    cv.notify_all();
  }
  ~threaded_queue()
  {
    terminate();
  }
private:
  std::mutex m;
  std::deque<T> data;
  std::condition_variable cv;
  bool abort = false;
};
struct thread_pool {
  thread_pool( std::size_t n = 1 ) { start_thread(n); }
  thread_pool( thread_pool&& ) = delete;
  thread_pool& operator=( thread_pool&& ) = delete;
  ~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> queue_task( F task ) {
    std::packaged_task<R()> p(std::move(task));
    auto r = p.get_future();
    tasks.push_back( std::move(p) );
    return r;
  }
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> run_task( F task ) {
    if (threads_active() >= total_threads()) {
      start_thread();
    }
    return queue_task( std::move(task) );
  }
  void terminate() {
    tasks.terminate();
  }
  std::size_t threads_active() const {
    return active;
  }
  std::size_t total_threads() const {
    return threads.size();
  }
  void clear_threads() {
    terminate();
    threads.clear();
  }
  void start_thread( std::size_t n = 1 ) {
    while(n-->0) {
      threads.push_back(
        std::async( std::launch::async,
          [this]{
            while(auto task = tasks.pop_front()) {
              ++active;
              try{
                (*task)();
              } catch(...) {
                --active;
                throw;
              }
              --active;
            }
          }
        )
      );
    }
  }
private:
  std::vector<std::future<void>> threads;
  threaded_queue<std::packaged_task<void()>> tasks;
  std::atomic<std::size_t> active = {};
};

struct my_timer_t {
    std::chrono::high_resolution_clock::time_point first;
    std::chrono::high_resolution_clock::duration duration;

    void start() {
        first = std::chrono::high_resolution_clock::now();
    }
    std::chrono::high_resolution_clock::duration finish() {
        return duration = std::chrono::high_resolution_clock::now()-first;
    }
    unsigned long long ms() const {
        return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
    }
};
int main() {
    std::vector<double> arr(1000000, 0);
    my_timer_t timer;

    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
    }
    timer.finish();
    auto time_transform = timer.ms();
    std::cout << time_transform << "<- std::transform (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
    }
    timer.finish();
    auto time_for_each = timer.ms();
    std::cout << time_for_each << "<- std::for_each (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    enum { num_threads = 8 };
    thread_pool pool(num_threads);
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::array< std::future<void>, num_threads > tasks;
        for (int t = 0; t < num_threads; ++t) {
            tasks[t] = pool.run_task([&,t]{
                std::for_each( arr.begin()+(arr.size()/num_threads)*t, arr.begin()+(arr.size()/num_threads)*(t+1), [](double& x){++x;} );
            });
        }
        // std::cout << "loop! -- " << pool.threads_active() << "/" << pool.total_threads() << std::endl;
        for (int t = 0; t < num_threads; ++t)
            tasks[t].wait();
    }
    timer.finish();
    auto time_pool = timer.ms();
    std::cout << time_pool << "<- thread_pool (" << arr[rand()%arr.size()] << ")\n";
}

153<- std::transform (100)
131<- std::for_each (200)
82<- thread_pool (300)