C++ 读取大量文件时发生SEGFULT

C++ 读取大量文件时发生SEGFULT,c++,segmentation-fault,C++,Segmentation Fault,我有这个方法:(它应该从给定目录中的所有文件创建一个倒排列表) 类索引{ 公众: 索引(); void create(); void writeInvertedIndex(); 私人: bool是空格(const char ch); std::map inversedindex; }; 无效索引::创建(){ std::string datasetPath=“/home/skluzada/Downloads/BI-VWM/Project/dataset/”; std::字符串文件路径、单词、文本

我有这个方法:(它应该从给定目录中的所有文件创建一个倒排列表)

类索引{
公众:
索引();
void create();
void writeInvertedIndex();
私人:
bool是空格(const char ch);
std::map inversedindex;
};
无效索引::创建(){
std::string datasetPath=“/home/skluzada/Downloads/BI-VWM/Project/dataset/”;
std::字符串文件路径、单词、文本;
std::ifstream-infle;
int fileIndex=0;
std::尺寸\u t文本长度,i;
DIR*DIR;
结构导向;
if((dir=opendir(datasetPath.c_str())!=NULL){
while((ent=readdir(dir))!=NULL){
filePath=datasetPath+ent->d_name;

std::cout所问问题的解决方案是rafix07的评论:


为什么不在
while(isWhiteSpace(text[i]){i++;}
中检查
i
,在下一个while循环中,您可能读取了超出范围的数据。更改为
while(i
并在第二个循环中执行同样的操作


崩溃似乎发生在
Index::writeInvertedIndex()
中,因此显示该函数可能是一件好事。请尝试创建一个函数来向我们显示(强调最小和完整位)。为什么不在
中检查
i
,而(isWhiteSpace(text[i]){i++;}
在下一个while循环中,您可能读取了超出范围的数据。更改为
while(i
并在第二个循环中执行同样的操作。@rafix07这就是问题所在,非常感谢。这个答案到底应该做什么?@FeiXiang修复了由于数组访问超出范围而导致的segfault问题。您的注释有什么意义?如果缺少某些内容,您可以编辑答案,而不仅仅是不清楚的注释。您基本上可以这样做st复制了问题下方已发布的评论。这根本不会添加任何新信息。您至少应该将其设置为社区wiki,因为它不包含您自己的工作。
class Index{
public:
  Index();
  void create();
  void writeInvertedIndex();
private:
  bool isWhiteSpace(const char ch);
  std::map<std::string, std::set<int>> invertedIndex;
};

 void Index::create(){
  std::string datasetPath = "/home/skluzada/Downloads/BI-VWM/Project/dataset/";
  std::string filePath, word, text;
  std::ifstream infile;
  int fileIndex = 0;
  std::size_t textLen, i;
  DIR * dir;
  struct dirent * ent;
  if ((dir = opendir (datasetPath.c_str())) != NULL){
    while((ent = readdir(dir)) != NULL){
      filePath = datasetPath + ent->d_name;
      std::cout << filePath << std::endl;
      std::ifstream inFile(filePath, std::ios::in);
      std::stringstream buffer;
      buffer << inFile.rdbuf();
      std::string text = buffer.str();
      inFile.close();
      textLen = text.size();
      i = 19;
      while (i < textLen){
        word = "";
        while(isWhiteSpace(text[i])){
          i++;
        }
        while(!isWhiteSpace(text[i])){
          word = word + text[i];
          i++;
        }
        invertedIndex[word].insert(fileIndex);
      }
      fileIndex++;
    }
  }
}
==9952== Syscall param writev(vector[...]) points to uninitialised byte(s)
==9952==    at 0x57F6610: __writev_nocancel (in /usr/lib64/libc-2.25.so)
==9952==    by 0x4EEC4B1: std::__basic_file<char>::xsputn_2(char const*, long, char const*, long) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F29BC1: std::basic_filebuf<char, std::char_traits<char> >::xsputn(char const*, long) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F4E063: std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x401BFC: Index::writeInvertedIndex() (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x4021D0: main (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==  Address 0x6f929f4 is 84 bytes inside a block of size 2,273 alloc'd
==9952==    at 0x4C2E1CA: operator new(unsigned long) (vg_replace_malloc.c:334)
==9952==    by 0x4F62144: void std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_construct<char*>(char*, char*, std::forward_iterator_tag) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F6219E: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x404A5F: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, 0ul>(std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>&, std::tuple<>&, std::_Index_tuple<0ul>, std::_Index_tuple<>) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x404782: std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(std::piecewise_construct_t, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, std::tuple<>) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x40458C: void __gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > >::construct<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >, std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, std::tuple<> >(std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >*, std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>&&, std::tuple<>&&) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x404247: void std::allocator_traits<std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > > >::construct<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >, std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, std::tuple<> >(std::allocator<std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > >&, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >*, std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>&&, std::tuple<>&&) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x403C6C: void std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > >::_M_construct_node<std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, std::tuple<> >(std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > >*, std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>&&, std::tuple<>&&) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x403069: std::_Rb_tree_node<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > >* std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > >::_M_create_node<std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, std::tuple<> >(std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>&&, std::tuple<>&&) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x402C4D: std::_Rb_tree_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > std::_Rb_tree<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > >, std::_Select1st<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > >::_M_emplace_hint_unique<std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, std::tuple<> >(std::_Rb_tree_const_iterator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > >, std::piecewise_construct_t const&, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>&&, std::tuple<>&&) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x4028A4: std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::set<int, std::less<int>, std::allocator<int> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::set<int, std::less<int>, std::allocator<int> > > > >::operator[](std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x40203A: Index::create() (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952== 
==9952== Syscall param writev(vector[...]) points to uninitialised byte(s)
==9952==    at 0x57F6610: __writev_nocancel (in /usr/lib64/libc-2.25.so)
==9952==    by 0x4EEC4B1: std::__basic_file<char>::xsputn_2(char const*, long, char const*, long) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F29BC1: std::basic_filebuf<char, std::char_traits<char> >::xsputn(char const*, long) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F42581: std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_int<long>(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F4E564: std::ostream& std::ostream::_M_insert<long>(long) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x401C82: Index::writeInvertedIndex() (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x4021D0: main (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==  Address 0x5d0f861 is 721 bytes inside a block of size 8,192 alloc'd
==9952==    at 0x4C2E8B7: operator new[](unsigned long) (vg_replace_malloc.c:423)
==9952==    by 0x4F2AA87: std::basic_filebuf<char, std::char_traits<char> >::_M_allocate_internal_buffer() (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F2EC71: std::basic_filebuf<char, std::char_traits<char> >::open(char const*, std::_Ios_Openmode) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x4F2ED92: std::basic_ofstream<char, std::char_traits<char> >::open(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::_Ios_Openmode) (in /usr/lib64/libstdc++.so.6.0.24)
==9952==    by 0x401B75: Index::writeInvertedIndex() (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952==    by 0x4021D0: main (in /home/skluzada/Downloads/BI-VWM/Project/index)
==9952== 
==9952== 
==9952== HEAP SUMMARY:
==9952==     in use at exit: 32,816 bytes in 1 blocks
==9952==   total heap usage: 820,861 allocs, 820,860 frees, 16,188,505,659 bytes allocated
==9952== 
==9952== LEAK SUMMARY:
==9952==    definitely lost: 32,816 bytes in 1 blocks
==9952==    indirectly lost: 0 bytes in 0 blocks
==9952==      possibly lost: 0 bytes in 0 blocks
==9952==    still reachable: 0 bytes in 0 blocks
==9952==         suppressed: 0 bytes in 0 blocks
==9952== Rerun with --leak-check=full to see details of leaked memory
==9952== 
==9952== For counts of detected and suppressed errors, rerun with: -v
==9952== Use --track-origins=yes to see where uninitialised values come from
==9952== ERROR SUMMARY: 681764 errors from 10 contexts (suppressed: 0 from 0)