C++ 线程-逐行从文件中读取_C++_Multithreading_File Io_Line By Line

C++ 线程-逐行从文件中读取

c++ multithreading file-io

C++ 线程-逐行从文件中读取,c++,multithreading,file-io,line-by-line,C++,Multithreading,File Io,Line By Line,我想从输出文件中逐行并行读取。每个线程读取一行，然后处理数据。同时，下一个线程必须读取下一行 std::ifstream infile("test.txt"); std::mutex mtx; void read(int id_thread){ while(infile.good()){ mtx.lock(); std::string sLine; getline(infile, sLine); std::cout << "Read b

我想从输出文件中逐行并行读取。每个线程读取一行，然后处理数据。同时，下一个线程必须读取下一行

std::ifstream infile("test.txt");
std::mutex mtx;

void read(int id_thread){
   while(infile.good()){
     mtx.lock();
     std::string sLine;
     getline(infile, sLine);
     std::cout << "Read by thread: " << id_thread;
     std::cout << sLine << std::endl;
     mtx.unlock();
   }
}

void main(){
  std::vector<std::thread> threads;
  for(int i = 0; i < num; i++){
     threads.push_back(std::thread(parallelFun, i));
  }

  for(auto& thread : threads){
      thread.join();
  }
  return 0;
}

std:：ifstream-infle（“test.txt”）；
std：：互斥mtx；
无效读取（内部id_线程）{
while（infle.good（））{
mtx.lock（）；
std：：字符串sLine；
getline（填充、sLine）；
std:：cout如果您希望每个线程读取一行（从您的描述中可以明显看出），请删除while循环，然后您需要确保您的线程数与文件中的行数相同
要摆脱上述约束，您可以使用boostthreadpool。
我会将循环更改为
while(infile.good()){
     mtx.lock();
     std::string sLine;
     getline(infile, sLine);
     mtx.unlock();
     std::cout << "Read by thread: " << id_thread;
     std::cout << sLine << std::endl;
   }

while（infle.good（））{
mtx.lock（）；
std：：字符串sLine；
getline（填充、sLine）；
mtx.unlock（）；
std:：cout如果您希望5个线程每5行读取一次，则必须同步读取，因此每个线程必须知道前一个线程已完成其部分的读取。这一要求可能会带来巨大的效率低下，因为某些线程可能会等待前一个线程很长时间才能运行
概念代码，未经测试使用，风险自负
让我们首先创建一个默认类来处理原子锁。我们对齐它以避免错误共享和相关的缓存乒乓
constexpr size_t CACHELINESIZE = 64; // could differ on your architecture
template<class dType>
class alignas(CACHELINESIZE) lockstep {
  std::atomic<dType> lock = dType(0);

public:
  // spinlock spins until the previous value is prev and then tries to set lock to value
  // until success, restart the spin if prev changes.
  dType Spinlock(dType prev = dType(0), dType next = dType(1)) {
     dType expected = prev;
     while (!lock.compare_exchange_weak(expected, next)) { // request for locked-exclusiv ~100 cycles?
       expected = prev;  // we wish to continue to wait for expected
       do {
         pause(); // on intel waits roughly one L2 latency time.
       } while(lock.load(std::memory_order_relaxed) != prev);  // only one cache miss per change
     }
     return expected;
  }

  void store(dType value) {
    lock.store(value);
  }
};

lockstep<int> lock { 0 };

constexpr int NoThreads = 5;

std::ifstream infile("test.txt");

void read(int id_thread) {
   locks[id_thread].lock = id_thread;
   bool izNoGood = false;
   int next = id_thread;

   while(!izNoGood){
     // get lock for next iteration
     lock.spinlock(next, next); // wait on our number

     // moved file check into locked region     
     izNoGood = !infile.good();
     if (izNoGood) {
       lock.store(next+1); // release next thread to end run.
       return;
     }

     std::string sLine;
     getline(infile, sLine);

     // release next thread
     lock.store(next+1);

     // do work asynchronous
     // ...

     // debug log, hopefully the whole line gets written in one go (atomic)
     // but can be in "random" order relative to other lines.
     std::cout << "Read by thread: " << id_thread << " line no. " << next
               << " text:" << sLine << std::endl;  // endl flushes cout, implicit sync?
     next += NoThreads;  // our next expected line to process
   }
}

void main() {
  std::vector<std::thread> threads;
  for(int i = 0; i < NoThreads; i++) {
     threads.push_back(std::thread(parallelFun, i));
  }

  for(auto& thread : threads){
      thread.join();
  }
  return 0;
}

constexpr size\u t CACHELINESIZE=64；//可能因您的体系结构而异
模板
类alignas（CACHELINESIZE）锁步{
std:：原子锁=数据类型（0）；
公众：
//spinlock旋转直到上一个值为prev，然后尝试将lock设置为value
//在成功之前，如果prev发生变化，则重新开始旋转。
dType自旋锁（dType prev=dType（0），dType next=dType（1））{
预期的数据类型=上一个；
而（！lock.compare_exchange_弱（预期，下一个））{//请求锁定的独占~100个周期？
expected=prev；//我们希望继续等待expected
做{
pause（）；//在英特尔上大约等待一个L2延迟时间。
}while（lock.load（std:：memory_order_released）！=prev）；//每次更改只会丢失一个缓存
}
预期收益；
}
无效存储（数据类型值）{
锁。存储（值）；
}
};
锁步锁{0}；
constexpr int NoThreads=5；
std:：ifstream infle（“test.txt”）；
无效读取（内部id_线程）{
锁定[id\U线程]。锁定=id\U线程；
bool-izNoGood=false；
int next=id_线程；
而（！izNoGood）{
//为下一次迭代获取锁
lock.spinlock（下一个，下一个）；//等一下我们的号码
//已将文件检查移动到锁定区域
izNoGood=！infle.good（）；
if（izNoGood）{
lock.store（next+1）；//释放下一个线程以结束运行。
返回；
}
std：：字符串sLine；
getline（填充、sLine）；
//释放下一个线程
锁。存储（下一个+1）；
//异步工作
// ...
//调试日志，希望整行都能一次完成（原子）
//但可以是相对于其他行的“随机”顺序。
std:：cout有什么问题吗？无论如何，读取是由互斥体序列化的。线程就是这样工作的，它们运行一段时间，然后让另一个线程运行一段时间，以此类推。如果一个线程运行的时间足够长，可以读取短文件中的所有行，那么您必须找到其他方法一次只读取一行（比如发出条件变量的信号？）。如果希望读取与处理并行进行，则应在读取该行后立即解锁互斥锁，直到处理完所读取的数据。如果在检查文件是否正常后立即安排线程，直到读取完文件后才返回，则可能会出现问题。这不是问题死锁情况，但它可能会在大文件和多个线程上产生IO错误。修复方法是在关键部分移动infle.good（）
，并用其结果更新一个bool
，该结果用于while循环。您要求的基本上是线程滥用。说：“[I]want：第一个线程读取第一行，第二个线程读取第二行，…，第n个线程第n行“相当于说：“我想要串行执行。”如果您想要串行执行，请不要使用多个线程。在我看来，这会让代码变得更糟糕。原始的输入循环已中断，而您将其保留为同等中断。通过将输出移到互斥部分之外，您允许多个线程并发写入，因此输出可能不再一致。这是也不例外安全请使用或其他RAII互斥处理程序之一，以确保在下一个循环迭代或函数之前总是释放锁exit@JerryCoffin我的假设是，这只是一个最低限度的示例代码，并发线程应该在实际代码中完成一些非常繁重的工作，这是不必要的ily与同步I/O相关。我同意，如果问题是“如何在线程之间同步I/O”，那么应该提到这一点