C++ QtConcurrent::map没有任何好处

C++ QtConcurrent::map没有任何好处,c++,multithreading,qt,qtconcurrent,C++,Multithreading,Qt,Qtconcurrent,我想使用QtConcurrent::map函数操作QVector。我的示例程序所做的就是将QVector中的所有值增加1 QVector<double> arr(10000000, 0); QElapsedTimer timer; qDebug() << QThreadPool::globalInstance()->maxThreadCount() << "Threads"; int end; /* * * * * * * * * * * * * *

我想使用
QtConcurrent::map
函数操作
QVector
。我的示例程序所做的就是将
QVector
中的所有值增加1

QVector<double> arr(10000000, 0);
QElapsedTimer timer;
qDebug() << QThreadPool::globalInstance()->maxThreadCount() << "Threads";

int end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
    std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
}
end = timer.elapsed();
qDebug() << end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
    std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
}
end = timer.elapsed();
qDebug() << end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
    QFuture<void> qf = QtConcurrent::map(arr.begin(), arr.end(), [](double &x){ ++x; });
    qf.waitForFinished();
}
end = timer.elapsed();
qDebug() << end;
因此,多线程版本几乎没有速度优势。我验证了实际上有4个线程正在运行。我使用-O2优化。更常见的
QThreadPool
方法是否更适合这种情况

编辑:

我使用
QtConcurrent::run()
尝试了不同的方法。以下是程序代码的相关部分:

void add1(QVector<double>::iterator first, QVector<double>::iterator last) {
    for(; first != last; ++first) {
        *first += 1;
    }
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
QFuture<void> qf[numThreads];
for(int j = 0; j < numThreads; ++j) {
    qf[j] = QtConcurrent::run(add1, arr.begin()+j*n/numThreads, arr.begin()+(j+1)*n/numThreads-1);
}
for(int j = 0; j < numThreads; ++j) {
    qf[j].waitForFinished();
}

<> p>还是与简单的使用C++线程原语和滚入自己的线程池相比,开销很高。
template<class T>
struct threaded_queue {
  using lock = std::unique_lock<std::mutex>;
  void push_back( T t ) {
    {
      lock l(m);
      data.push_back(std::move(t));
    }
    cv.notify_one();
  }
  boost::optional<T> pop_front() {
    lock l(m);
    cv.wait(l, [this]{ return abort || !data.empty(); } );
    if (abort) return {};
    auto r = std::move(data.back());
    data.pop_back();
    return std::move(r);
  }
  void terminate() {
    {
      lock l(m);
      abort = true;
      data.clear();
    }
    cv.notify_all();
  }
  ~threaded_queue()
  {
    terminate();
  }
private:
  std::mutex m;
  std::deque<T> data;
  std::condition_variable cv;
  bool abort = false;
};
struct thread_pool {
  thread_pool( std::size_t n = 1 ) { start_thread(n); }
  thread_pool( thread_pool&& ) = delete;
  thread_pool& operator=( thread_pool&& ) = delete;
  ~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> queue_task( F task ) {
    std::packaged_task<R()> p(std::move(task));
    auto r = p.get_future();
    tasks.push_back( std::move(p) );
    return r;
  }
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> run_task( F task ) {
    if (threads_active() >= total_threads()) {
      start_thread();
    }
    return queue_task( std::move(task) );
  }
  void terminate() {
    tasks.terminate();
  }
  std::size_t threads_active() const {
    return active;
  }
  std::size_t total_threads() const {
    return threads.size();
  }
  void clear_threads() {
    terminate();
    threads.clear();
  }
  void start_thread( std::size_t n = 1 ) {
    while(n-->0) {
      threads.push_back(
        std::async( std::launch::async,
          [this]{
            while(auto task = tasks.pop_front()) {
              ++active;
              try{
                (*task)();
              } catch(...) {
                --active;
                throw;
              }
              --active;
            }
          }
        )
      );
    }
  }
private:
  std::vector<std::future<void>> threads;
  threaded_queue<std::packaged_task<void()>> tasks;
  std::atomic<std::size_t> active = {};
};

struct my_timer_t {
    std::chrono::high_resolution_clock::time_point first;
    std::chrono::high_resolution_clock::duration duration;

    void start() {
        first = std::chrono::high_resolution_clock::now();
    }
    std::chrono::high_resolution_clock::duration finish() {
        return duration = std::chrono::high_resolution_clock::now()-first;
    }
    unsigned long long ms() const {
        return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
    }
};
int main() {
    std::vector<double> arr(1000000, 0);
    my_timer_t timer;

    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
    }
    timer.finish();
    auto time_transform = timer.ms();
    std::cout << time_transform << "<- std::transform (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
    }
    timer.finish();
    auto time_for_each = timer.ms();
    std::cout << time_for_each << "<- std::for_each (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    enum { num_threads = 8 };
    thread_pool pool(num_threads);
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::array< std::future<void>, num_threads > tasks;
        for (int t = 0; t < num_threads; ++t) {
            tasks[t] = pool.run_task([&,t]{
                std::for_each( arr.begin()+(arr.size()/num_threads)*t, arr.begin()+(arr.size()/num_threads)*(t+1), [](double& x){++x;} );
            });
        }
        // std::cout << "loop! -- " << pool.threads_active() << "/" << pool.total_threads() << std::endl;
        for (int t = 0; t < num_threads; ++t)
            tasks[t].wait();
    }
    timer.finish();
    auto time_pool = timer.ms();
    std::cout << time_pool << "<- thread_pool (" << arr[rand()%arr.size()] << ")\n";
}
模板
结构线程队列{
使用lock=std::unique_lock;
无效推回(T){
{
锁l(m);
数据。推回(std::move(t));
}
cv.通知_one();
}
boost::可选的pop_front(){
锁l(m);
wait(l,[this]{return abort | | |!data.empty();});
if(abort)返回{};
auto r=std::move(data.back());
data.pop_back();
返回std::move(r);
}
无效终止(){
{
锁l(m);
中止=真;
data.clear();
}
cv.通知所有人();
}
~u队列()
{
终止();
}
私人:
std::互斥m;
std::deque数据;
std::条件变量cv;
bool abort=false;
};
结构线程池{
线程池(std::size_t n=1){start_线程(n);}
线程池(线程池&&)=删除;
线程池&运算符=(线程池&&)=删除;
~thread_pool()=default;//或`{terminate();}`如果要放弃某些任务
模板
std::未来队列任务(F任务){
std::打包任务p(std::移动(任务));
自动r=p。获取未来();
任务。推回(std::move(p));
返回r;
}
模板
std::未来运行任务(F任务){
如果(线程数\活动线程数()>=总线程数()){
启动_线程();
}
返回队列_任务(std::move(task));
}
无效终止(){
tasks.terminate();
}
std::size\u t threads\u active()常量{
主动返回;
}
std::size\u t总线程数()常量{
返回线程。size();
}
无效清除_线程(){
终止();
线程。清除();
}
无效起始螺纹(标准::尺寸n=1){
而(n-->0){
把你推回去(
std::async(std::launch::async,
[本]{
while(auto task=tasks.pop_front()){
++活跃的;
试一试{
(*任务)();
}捕获(…){
--活跃的;
投掷;
}
--活跃的;
}
}
)
);
}
}
私人:
向量线程;
线程队列任务;
std::原子活动={};
};
构造我的计时器{
std::chrono::高分辨率时钟::时间点优先;
标准::时钟::高分辨率时钟::持续时间;
void start(){
first=std::chrono::高分辨率时钟::now();
}
标准::时钟::高分辨率时钟::持续时间完成(){
返回持续时间=std::chrono::高分辨率时钟::now()-第一;
}
无符号长ms()常量{
返回std::chrono::duration_cast(duration).count();
}
};
int main(){
std::向量arr(1000000,0);
我的定时器;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
对于(int i=0;i<100;++i){
std::transform(arr.begin(),arr.end(),arr.begin(),[](双x){return++x;});
}
timer.finish();
自动时间转换=timer.ms();

你为什么期望加速?你在每个循环迭代中都在等待未来。我不是这个领域的专家,但我期望map()启动4个线程,这将使此代码行比STL函数完成得更快。或者我误解了此函数的概念吗?您如何测试
QtConcurrent
的开销?请注意,您将
++
操作分组为
num_Threads
批。您也可以使用
QtConcurrent
执行此操作。@m7913d这就是
QtConcurrent
应该做的事情;根据硬件线程的数量启动一些子线程来处理部分任务。我只是手动完成了。我在
上得到了显著的加速,每个
QtConcurrent
都将每个操作分配给一个线程(考虑最大并发线程数)。它不会对它们进行分组。请注意,如果操作花费的时间可能不相同,则不能直接正确地对操作进行分组。@m7913d哦,这太糟糕了。我错了,Qt通过查看用户和开销时间来实现一些分组。它从每批一个操作开始,每次开销比以前大时,它都会将其加倍e用户执行时间。因此,在使用大批量之前需要进行大量试验,因此会产生大量开销。
181 ms // std::for_each
163 ms // QtConcurrent::run
template<class T>
struct threaded_queue {
  using lock = std::unique_lock<std::mutex>;
  void push_back( T t ) {
    {
      lock l(m);
      data.push_back(std::move(t));
    }
    cv.notify_one();
  }
  boost::optional<T> pop_front() {
    lock l(m);
    cv.wait(l, [this]{ return abort || !data.empty(); } );
    if (abort) return {};
    auto r = std::move(data.back());
    data.pop_back();
    return std::move(r);
  }
  void terminate() {
    {
      lock l(m);
      abort = true;
      data.clear();
    }
    cv.notify_all();
  }
  ~threaded_queue()
  {
    terminate();
  }
private:
  std::mutex m;
  std::deque<T> data;
  std::condition_variable cv;
  bool abort = false;
};
struct thread_pool {
  thread_pool( std::size_t n = 1 ) { start_thread(n); }
  thread_pool( thread_pool&& ) = delete;
  thread_pool& operator=( thread_pool&& ) = delete;
  ~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> queue_task( F task ) {
    std::packaged_task<R()> p(std::move(task));
    auto r = p.get_future();
    tasks.push_back( std::move(p) );
    return r;
  }
  template<class F, class R=std::result_of_t<F&()>>
  std::future<R> run_task( F task ) {
    if (threads_active() >= total_threads()) {
      start_thread();
    }
    return queue_task( std::move(task) );
  }
  void terminate() {
    tasks.terminate();
  }
  std::size_t threads_active() const {
    return active;
  }
  std::size_t total_threads() const {
    return threads.size();
  }
  void clear_threads() {
    terminate();
    threads.clear();
  }
  void start_thread( std::size_t n = 1 ) {
    while(n-->0) {
      threads.push_back(
        std::async( std::launch::async,
          [this]{
            while(auto task = tasks.pop_front()) {
              ++active;
              try{
                (*task)();
              } catch(...) {
                --active;
                throw;
              }
              --active;
            }
          }
        )
      );
    }
  }
private:
  std::vector<std::future<void>> threads;
  threaded_queue<std::packaged_task<void()>> tasks;
  std::atomic<std::size_t> active = {};
};

struct my_timer_t {
    std::chrono::high_resolution_clock::time_point first;
    std::chrono::high_resolution_clock::duration duration;

    void start() {
        first = std::chrono::high_resolution_clock::now();
    }
    std::chrono::high_resolution_clock::duration finish() {
        return duration = std::chrono::high_resolution_clock::now()-first;
    }
    unsigned long long ms() const {
        return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
    }
};
int main() {
    std::vector<double> arr(1000000, 0);
    my_timer_t timer;

    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
    }
    timer.finish();
    auto time_transform = timer.ms();
    std::cout << time_transform << "<- std::transform (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
    }
    timer.finish();
    auto time_for_each = timer.ms();
    std::cout << time_for_each << "<- std::for_each (" << arr[rand()%arr.size()] << ")\n";
    /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
    enum { num_threads = 8 };
    thread_pool pool(num_threads);
    timer.start();
    for(int i = 0; i < 100; ++i) {
        std::array< std::future<void>, num_threads > tasks;
        for (int t = 0; t < num_threads; ++t) {
            tasks[t] = pool.run_task([&,t]{
                std::for_each( arr.begin()+(arr.size()/num_threads)*t, arr.begin()+(arr.size()/num_threads)*(t+1), [](double& x){++x;} );
            });
        }
        // std::cout << "loop! -- " << pool.threads_active() << "/" << pool.total_threads() << std::endl;
        for (int t = 0; t < num_threads; ++t)
            tasks[t].wait();
    }
    timer.finish();
    auto time_pool = timer.ms();
    std::cout << time_pool << "<- thread_pool (" << arr[rand()%arr.size()] << ")\n";
}
153<- std::transform (100)
131<- std::for_each (200)
82<- thread_pool (300)