C++ QtConcurrent::map没有任何好处
我想使用C++ QtConcurrent::map没有任何好处,c++,multithreading,qt,qtconcurrent,C++,Multithreading,Qt,Qtconcurrent,我想使用QtConcurrent::map函数操作QVector。我的示例程序所做的就是将QVector中的所有值增加1 QVector<double> arr(10000000, 0); QElapsedTimer timer; qDebug() << QThreadPool::globalInstance()->maxThreadCount() << "Threads"; int end; /* * * * * * * * * * * * * *
QtConcurrent::map
函数操作QVector
。我的示例程序所做的就是将QVector
中的所有值增加1
QVector<double> arr(10000000, 0);
QElapsedTimer timer;
qDebug() << QThreadPool::globalInstance()->maxThreadCount() << "Threads";
int end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
}
end = timer.elapsed();
qDebug() << end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
}
end = timer.elapsed();
qDebug() << end;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
QFuture<void> qf = QtConcurrent::map(arr.begin(), arr.end(), [](double &x){ ++x; });
qf.waitForFinished();
}
end = timer.elapsed();
qDebug() << end;
因此,多线程版本几乎没有速度优势。我验证了实际上有4个线程正在运行。我使用-O2优化。更常见的QThreadPool
方法是否更适合这种情况
编辑:
我使用QtConcurrent::run()
尝试了不同的方法。以下是程序代码的相关部分:
void add1(QVector<double>::iterator first, QVector<double>::iterator last) {
for(; first != last; ++first) {
*first += 1;
}
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
QFuture<void> qf[numThreads];
for(int j = 0; j < numThreads; ++j) {
qf[j] = QtConcurrent::run(add1, arr.begin()+j*n/numThreads, arr.begin()+(j+1)*n/numThreads-1);
}
for(int j = 0; j < numThreads; ++j) {
qf[j].waitForFinished();
}
<> p>还是与简单的使用C++线程原语和滚入自己的线程池相比,开销很高。
template<class T>
struct threaded_queue {
using lock = std::unique_lock<std::mutex>;
void push_back( T t ) {
{
lock l(m);
data.push_back(std::move(t));
}
cv.notify_one();
}
boost::optional<T> pop_front() {
lock l(m);
cv.wait(l, [this]{ return abort || !data.empty(); } );
if (abort) return {};
auto r = std::move(data.back());
data.pop_back();
return std::move(r);
}
void terminate() {
{
lock l(m);
abort = true;
data.clear();
}
cv.notify_all();
}
~threaded_queue()
{
terminate();
}
private:
std::mutex m;
std::deque<T> data;
std::condition_variable cv;
bool abort = false;
};
struct thread_pool {
thread_pool( std::size_t n = 1 ) { start_thread(n); }
thread_pool( thread_pool&& ) = delete;
thread_pool& operator=( thread_pool&& ) = delete;
~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
template<class F, class R=std::result_of_t<F&()>>
std::future<R> queue_task( F task ) {
std::packaged_task<R()> p(std::move(task));
auto r = p.get_future();
tasks.push_back( std::move(p) );
return r;
}
template<class F, class R=std::result_of_t<F&()>>
std::future<R> run_task( F task ) {
if (threads_active() >= total_threads()) {
start_thread();
}
return queue_task( std::move(task) );
}
void terminate() {
tasks.terminate();
}
std::size_t threads_active() const {
return active;
}
std::size_t total_threads() const {
return threads.size();
}
void clear_threads() {
terminate();
threads.clear();
}
void start_thread( std::size_t n = 1 ) {
while(n-->0) {
threads.push_back(
std::async( std::launch::async,
[this]{
while(auto task = tasks.pop_front()) {
++active;
try{
(*task)();
} catch(...) {
--active;
throw;
}
--active;
}
}
)
);
}
}
private:
std::vector<std::future<void>> threads;
threaded_queue<std::packaged_task<void()>> tasks;
std::atomic<std::size_t> active = {};
};
struct my_timer_t {
std::chrono::high_resolution_clock::time_point first;
std::chrono::high_resolution_clock::duration duration;
void start() {
first = std::chrono::high_resolution_clock::now();
}
std::chrono::high_resolution_clock::duration finish() {
return duration = std::chrono::high_resolution_clock::now()-first;
}
unsigned long long ms() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
}
};
int main() {
std::vector<double> arr(1000000, 0);
my_timer_t timer;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
}
timer.finish();
auto time_transform = timer.ms();
std::cout << time_transform << "<- std::transform (" << arr[rand()%arr.size()] << ")\n";
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
}
timer.finish();
auto time_for_each = timer.ms();
std::cout << time_for_each << "<- std::for_each (" << arr[rand()%arr.size()] << ")\n";
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
enum { num_threads = 8 };
thread_pool pool(num_threads);
timer.start();
for(int i = 0; i < 100; ++i) {
std::array< std::future<void>, num_threads > tasks;
for (int t = 0; t < num_threads; ++t) {
tasks[t] = pool.run_task([&,t]{
std::for_each( arr.begin()+(arr.size()/num_threads)*t, arr.begin()+(arr.size()/num_threads)*(t+1), [](double& x){++x;} );
});
}
// std::cout << "loop! -- " << pool.threads_active() << "/" << pool.total_threads() << std::endl;
for (int t = 0; t < num_threads; ++t)
tasks[t].wait();
}
timer.finish();
auto time_pool = timer.ms();
std::cout << time_pool << "<- thread_pool (" << arr[rand()%arr.size()] << ")\n";
}
模板
结构线程队列{
使用lock=std::unique_lock;
无效推回(T){
{
锁l(m);
数据。推回(std::move(t));
}
cv.通知_one();
}
boost::可选的pop_front(){
锁l(m);
wait(l,[this]{return abort | | |!data.empty();});
if(abort)返回{};
auto r=std::move(data.back());
data.pop_back();
返回std::move(r);
}
无效终止(){
{
锁l(m);
中止=真;
data.clear();
}
cv.通知所有人();
}
~u队列()
{
终止();
}
私人:
std::互斥m;
std::deque数据;
std::条件变量cv;
bool abort=false;
};
结构线程池{
线程池(std::size_t n=1){start_线程(n);}
线程池(线程池&&)=删除;
线程池&运算符=(线程池&&)=删除;
~thread_pool()=default;//或`{terminate();}`如果要放弃某些任务
模板
std::未来队列任务(F任务){
std::打包任务p(std::移动(任务));
自动r=p。获取未来();
任务。推回(std::move(p));
返回r;
}
模板
std::未来运行任务(F任务){
如果(线程数\活动线程数()>=总线程数()){
启动_线程();
}
返回队列_任务(std::move(task));
}
无效终止(){
tasks.terminate();
}
std::size\u t threads\u active()常量{
主动返回;
}
std::size\u t总线程数()常量{
返回线程。size();
}
无效清除_线程(){
终止();
线程。清除();
}
无效起始螺纹(标准::尺寸n=1){
而(n-->0){
把你推回去(
std::async(std::launch::async,
[本]{
while(auto task=tasks.pop_front()){
++活跃的;
试一试{
(*任务)();
}捕获(…){
--活跃的;
投掷;
}
--活跃的;
}
}
)
);
}
}
私人:
向量线程;
线程队列任务;
std::原子活动={};
};
构造我的计时器{
std::chrono::高分辨率时钟::时间点优先;
标准::时钟::高分辨率时钟::持续时间;
void start(){
first=std::chrono::高分辨率时钟::now();
}
标准::时钟::高分辨率时钟::持续时间完成(){
返回持续时间=std::chrono::高分辨率时钟::now()-第一;
}
无符号长ms()常量{
返回std::chrono::duration_cast(duration).count();
}
};
int main(){
std::向量arr(1000000,0);
我的定时器;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
对于(int i=0;i<100;++i){
std::transform(arr.begin(),arr.end(),arr.begin(),[](双x){return++x;});
}
timer.finish();
自动时间转换=timer.ms();
你为什么期望加速?你在每个循环迭代中都在等待未来。我不是这个领域的专家,但我期望map()启动4个线程,这将使此代码行比STL函数完成得更快。或者我误解了此函数的概念吗?您如何测试QtConcurrent
的开销?请注意,您将++
操作分组为num_Threads
批。您也可以使用QtConcurrent
执行此操作。@m7913d这就是QtConcurrent
应该做的事情;根据硬件线程的数量启动一些子线程来处理部分任务。我只是手动完成了。我在上得到了显著的加速,每个QtConcurrent
都将每个操作分配给一个线程(考虑最大并发线程数)。它不会对它们进行分组。请注意,如果操作花费的时间可能不相同,则不能直接正确地对操作进行分组。@m7913d哦,这太糟糕了。我错了,Qt通过查看用户和开销时间来实现一些分组。它从每批一个操作开始,每次开销比以前大时,它都会将其加倍e用户执行时间。因此,在使用大批量之前需要进行大量试验,因此会产生大量开销。
181 ms // std::for_each
163 ms // QtConcurrent::run
template<class T>
struct threaded_queue {
using lock = std::unique_lock<std::mutex>;
void push_back( T t ) {
{
lock l(m);
data.push_back(std::move(t));
}
cv.notify_one();
}
boost::optional<T> pop_front() {
lock l(m);
cv.wait(l, [this]{ return abort || !data.empty(); } );
if (abort) return {};
auto r = std::move(data.back());
data.pop_back();
return std::move(r);
}
void terminate() {
{
lock l(m);
abort = true;
data.clear();
}
cv.notify_all();
}
~threaded_queue()
{
terminate();
}
private:
std::mutex m;
std::deque<T> data;
std::condition_variable cv;
bool abort = false;
};
struct thread_pool {
thread_pool( std::size_t n = 1 ) { start_thread(n); }
thread_pool( thread_pool&& ) = delete;
thread_pool& operator=( thread_pool&& ) = delete;
~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
template<class F, class R=std::result_of_t<F&()>>
std::future<R> queue_task( F task ) {
std::packaged_task<R()> p(std::move(task));
auto r = p.get_future();
tasks.push_back( std::move(p) );
return r;
}
template<class F, class R=std::result_of_t<F&()>>
std::future<R> run_task( F task ) {
if (threads_active() >= total_threads()) {
start_thread();
}
return queue_task( std::move(task) );
}
void terminate() {
tasks.terminate();
}
std::size_t threads_active() const {
return active;
}
std::size_t total_threads() const {
return threads.size();
}
void clear_threads() {
terminate();
threads.clear();
}
void start_thread( std::size_t n = 1 ) {
while(n-->0) {
threads.push_back(
std::async( std::launch::async,
[this]{
while(auto task = tasks.pop_front()) {
++active;
try{
(*task)();
} catch(...) {
--active;
throw;
}
--active;
}
}
)
);
}
}
private:
std::vector<std::future<void>> threads;
threaded_queue<std::packaged_task<void()>> tasks;
std::atomic<std::size_t> active = {};
};
struct my_timer_t {
std::chrono::high_resolution_clock::time_point first;
std::chrono::high_resolution_clock::duration duration;
void start() {
first = std::chrono::high_resolution_clock::now();
}
std::chrono::high_resolution_clock::duration finish() {
return duration = std::chrono::high_resolution_clock::now()-first;
}
unsigned long long ms() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
}
};
int main() {
std::vector<double> arr(1000000, 0);
my_timer_t timer;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
std::transform(arr.begin(), arr.end(), arr.begin(), [](double x){ return ++x; });
}
timer.finish();
auto time_transform = timer.ms();
std::cout << time_transform << "<- std::transform (" << arr[rand()%arr.size()] << ")\n";
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
timer.start();
for(int i = 0; i < 100; ++i) {
std::for_each(arr.begin(), arr.end(), [](double &x){ ++x; });
}
timer.finish();
auto time_for_each = timer.ms();
std::cout << time_for_each << "<- std::for_each (" << arr[rand()%arr.size()] << ")\n";
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
enum { num_threads = 8 };
thread_pool pool(num_threads);
timer.start();
for(int i = 0; i < 100; ++i) {
std::array< std::future<void>, num_threads > tasks;
for (int t = 0; t < num_threads; ++t) {
tasks[t] = pool.run_task([&,t]{
std::for_each( arr.begin()+(arr.size()/num_threads)*t, arr.begin()+(arr.size()/num_threads)*(t+1), [](double& x){++x;} );
});
}
// std::cout << "loop! -- " << pool.threads_active() << "/" << pool.total_threads() << std::endl;
for (int t = 0; t < num_threads; ++t)
tasks[t].wait();
}
timer.finish();
auto time_pool = timer.ms();
std::cout << time_pool << "<- thread_pool (" << arr[rand()%arr.size()] << ")\n";
}
153<- std::transform (100)
131<- std::for_each (200)
82<- thread_pool (300)