C++ 如何使用多线程将矩阵提升为幂?
我正在尝试将矩阵提升到多线程的能力,但我对线程不是很在行。另外,我从键盘输入线程数,该数字在[1,矩阵高度]范围内,然后执行以下操作:C++ 如何使用多线程将矩阵提升为幂?,c++,multithreading,c++11,matrix,stdthread,C++,Multithreading,C++11,Matrix,Stdthread,我正在尝试将矩阵提升到多线程的能力,但我对线程不是很在行。另外,我从键盘输入线程数,该数字在[1,矩阵高度]范围内,然后执行以下操作: unsigned period = ceil((double)A.getHeight() / threadNum); unsigned prev = 0, next = period; for (unsigned i(0); i < threadNum; ++i) { threads.emplace_back(&power<l
unsigned period = ceil((double)A.getHeight() / threadNum);
unsigned prev = 0, next = period;
for (unsigned i(0); i < threadNum; ++i) {
threads.emplace_back(&power<long long>, std::ref(result), std::ref(A), std::ref(B), prev, next, p);
if (next + period > A.getHeight()) {
prev = next;
next = A.getHeight();
}
else {
prev = next;
next += period;
}
}
unsigned period=ceil((double)A.getHeight()/threadNum);
无符号上一个=0,下一个=period;
for(无符号i(0);iA.getHeight()){
上一个=下一个;
next=A.getHeight();
}
否则{
上一个=下一个;
下一个+=周期;
}
}
对我来说,用多个线程将一个矩阵乘以另一个矩阵很容易,但这里的问题是,一旦完成了一个步骤,例如,我需要将A提高到3的幂,A^2就是这一步,在这一步之后,我必须等待所有线程完成,然后再继续执行^2*A。我如何让我的线程等待呢?我正在使用std::线程
在第一次回复发布后,我意识到我忘了提及我只想创建一次这些线程,而不是为每个乘法步骤重新创建它们。我将从一个简单的分解开始:
- 矩阵乘法是多线程的
- 矩阵指数多次调用乘法
Mat multithreaded_multiply(Mat const& left, Mat const& right) {...}
Mat power(Mat const& M, int n)
{
// Handle degenerate cases here (n = 0, 1)
// Regular loop
Mat intermediate = M;
for (int i = 2; i <= n; ++i)
{
intermediate = multithreaded_multiply(M, intermediate);
}
}
Mat多线程乘法(Mat const&left,Mat const&right){…}
Mat电源(Mat const&M,int n)
{
//在此处处理退化情况(n=0,1)
//正则循环
Mat中间值=M;
对于(inti=2;i我建议使用
算法如下所示:
将矩阵拆分为N个部分,用于N个线程
每个线程计算一次乘法所需的子矩阵
然后,它使用fetch\u add
递增一个原子threads\u finished
计数器,并等待一个共享条件变量
完成的最后一个线程(fetch_add()+1==thread count)通知所有线程现在可以继续处理
利润
编辑:
下面是如何停止线程的示例:
#include <iostream>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <vector>
#include <algorithm>
#include <atomic>
void sync_threads(std::condition_variable & cv, std::mutex & mut, std::vector<int> & threads, const int idx) {
std::unique_lock<std::mutex> lock(mut);
threads[idx] = 1;
if(std::find(threads.begin(),threads.end(),0) == threads.end()) {
for(auto & i: threads)
i = 0;
cv.notify_all();
} else {
while(threads[idx])
cv.wait(lock);
}
}
int main(){
std::vector<std::thread> threads;
std::mutex mut;
std::condition_variable cv;
int max_threads = 10;
std::vector<int> thread_wait(max_threads,0);
for(int i = 0; i < max_threads; i++) {
threads.emplace_back([&,i](){
std::cout << "Thread "+ std::to_string(i)+" started\n";
sync_threads(cv,mut,thread_wait,i);
std::cout << "Continuing thread " + std::to_string(i) + "\n";
sync_threads(cv,mut,thread_wait,i);
std::cout << "Continuing thread for second time " + std::to_string(i) + "\n";
});
}
for(auto & i: threads)
i.join();
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
void sync_线程(std::condition_变量&cv、std::mutex&mut、std::vector&threads、const int idx){
std::唯一锁定(mut);
线程[idx]=1;
if(std::find(threads.begin(),threads.end(),0)=threads.end()){
用于(自动和i:线程)
i=0;
cv.通知所有人();
}否则{
while(线程[idx])
cv.等待(锁定);
}
}
int main(){
向量线程;
std::互斥mut;
std::条件变量cv;
int max_线程=10;
std::向量线程等待(最大线程数,0);
对于(int i=0;i std::cout不是编程而是数学答案:对于每个方阵,都有一组所谓的“特征值”和“特征向量”所以M*E_i=lambda_i*E_i.M是矩阵,E_i是特征向量,lambda_i是特征值,它只是一个复数。所以M^n*E_i=lambda_i^n*E_i。所以你只需要复数的n次方而不是矩阵。特征向量是正交的,即任何向量V=sum_i a_i*E_i。所以M^n*V=sum_i a_i a_i da^n E_i。
根据您的问题,这可能会显著加快速度。这里有一个mass\u thread\u pool
:
// launches n threads all doing task F with an index:
template<class F>
struct mass_thread_pool {
F f;
std::vector< std::thread > threads;
std::condition_variable cv;
std::mutex m;
size_t task_id = 0;
size_t finished_count = 0;
std::unique_ptr<std::promise<void>> task_done;
std::atomic<bool> finished;
void task( F f, size_t n, size_t cur_task ) {
//std::cout << "Thread " << n << " launched" << std::endl;
do {
f(n);
std::unique_lock<std::mutex> lock(m);
if (finished)
break;
++finished_count;
if (finished_count == threads.size())
{
//std::cout << "task set finished" << std::endl;
task_done->set_value();
finished_count = 0;
}
cv.wait(lock,[&]{if (finished) return true; if (cur_task == task_id) return false; cur_task=task_id; return true;});
} while(!finished);
//std::cout << finished << std::endl;
//std::cout << "Thread " << n << " finished" << std::endl;
}
mass_thread_pool() = delete;
mass_thread_pool(F fin):f(fin),finished(false) {}
mass_thread_pool(mass_thread_pool&&)=delete; // address is party of identity
std::future<void> kick( size_t n ) {
//std::cout << "kicking " << n << " threads off. Prior count is " << threads.size() << std::endl;
std::future<void> r;
{
std::unique_lock<std::mutex> lock(m);
++task_id;
task_done.reset( new std::promise<void>() );
finished_count = 0;
r = task_done->get_future();
while (threads.size() < n) {
size_t i = threads.size();
threads.emplace_back( &mass_thread_pool::task, this, f, i, task_id );
}
//std::cout << "count is now " << threads.size() << std::endl;
}
cv.notify_all();
return r;
}
~mass_thread_pool() {
//std::cout << "destroying thread pool" << std::endl;
finished = true;
cv.notify_all();
for (auto&& t:threads) {
//std::cout << "joining thread" << std::endl;
t.join();
}
//std::cout << "destroyed thread pool" << std::endl;
}
};
//启动n个线程,所有线程都使用索引执行任务F:
模板
结构质量线程池{
F;
std::vectorthreads;
std::条件变量cv;
std::互斥m;
大小任务id=0;
大小\u t已完成\u计数=0;
std::唯一任务完成;
标准::原子完成;
无效任务(F、大小、大小当前任务){
//std::cout但这意味着我每次都必须在多线程乘法中重新创建这些线程,是否可以在main中创建线程并以某种方式重用它们,直到得到结果?在这种情况下,将每个线程专用于沿一条线(甚至针对给定的像素)计算结果,并让每个线程计算幂次结果。是的,但是再一次,我的线程如何知道它完成了一个乘法步骤,并等待其他线程完成相同的一个乘法步骤?这是我的主要问题。我编辑了代码,查看for(;;)join();循环。但这是power,您的列将被更改,如果不按顺序执行,您的结果将是错误的。计数器不需要原子。condition\u变量
始终与保护相关条件的mutex
配对,在这种情况下,计数器必须对条件进行任何修改当互斥锁被锁定时,否则你会在程序中遇到竞争条件。你能用代码编写吗?我不知道如何正确地使用所有这些,第一步和第二步我理解,第三步和第四步我很困惑,尤其是关于原子计数器。不要写全部内容,只要告诉我如何使用每个乘法步骤之前的原子计数器。但这是在等待其他线程被创建,而不是等待其他线程完成它们的工作。@paul1076你是什么意思?对我来说,它首先执行所有“起始线程”行,然后才执行“继续线程”行,然后才执行“第二次继续线程”。@UldisK so,wait,cv.wait(锁)仅适用于当前线程,还是它也适用于我完成后创建的线程?谢谢,但我不知道这与我的问题有什么关系。顺便说一句,线性乘法效率不高。请尝试计算a^7
=a*a^2*a^4
,其中a^4=a^2^2
。这需要4个乘法在
// launches n threads all doing task F with an index:
template<class F>
struct mass_thread_pool {
F f;
std::vector< std::thread > threads;
std::condition_variable cv;
std::mutex m;
size_t task_id = 0;
size_t finished_count = 0;
std::unique_ptr<std::promise<void>> task_done;
std::atomic<bool> finished;
void task( F f, size_t n, size_t cur_task ) {
//std::cout << "Thread " << n << " launched" << std::endl;
do {
f(n);
std::unique_lock<std::mutex> lock(m);
if (finished)
break;
++finished_count;
if (finished_count == threads.size())
{
//std::cout << "task set finished" << std::endl;
task_done->set_value();
finished_count = 0;
}
cv.wait(lock,[&]{if (finished) return true; if (cur_task == task_id) return false; cur_task=task_id; return true;});
} while(!finished);
//std::cout << finished << std::endl;
//std::cout << "Thread " << n << " finished" << std::endl;
}
mass_thread_pool() = delete;
mass_thread_pool(F fin):f(fin),finished(false) {}
mass_thread_pool(mass_thread_pool&&)=delete; // address is party of identity
std::future<void> kick( size_t n ) {
//std::cout << "kicking " << n << " threads off. Prior count is " << threads.size() << std::endl;
std::future<void> r;
{
std::unique_lock<std::mutex> lock(m);
++task_id;
task_done.reset( new std::promise<void>() );
finished_count = 0;
r = task_done->get_future();
while (threads.size() < n) {
size_t i = threads.size();
threads.emplace_back( &mass_thread_pool::task, this, f, i, task_id );
}
//std::cout << "count is now " << threads.size() << std::endl;
}
cv.notify_all();
return r;
}
~mass_thread_pool() {
//std::cout << "destroying thread pool" << std::endl;
finished = true;
cv.notify_all();
for (auto&& t:threads) {
//std::cout << "joining thread" << std::endl;
t.join();
}
//std::cout << "destroyed thread pool" << std::endl;
}
};