C++ 如何使用ifstream（C+；+；）只读取一些以前知道的行_C++_File_Input_Io

C++ 如何使用ifstream（C+；+；）只读取一些以前知道的行

c++ file input io

C++ 如何使用ifstream（C+；+；）只读取一些以前知道的行,c++,file,input,io,C++,File,Input,Io,通过对文件进行预处理，我找到了一些需要进一步处理的行，我知道我想读这些行。有没有比使用ifstream:：getline（…）逐行读取更快的解决方案例如，我知道我只需要乘积4的行（0-4-8-12-16-…）或存储在向量中的特殊行号现在我要做的是： string line; int counter = 0; while( getline(ifstr,line) ){ if(counter%4 =0){ // some code working with line }

通过对文件进行预处理，我找到了一些需要进一步处理的行，我知道我想读这些行。有没有比使用

ifstream:：getline（…）

逐行读取更快的解决方案

例如，我知道我只需要乘积4的行（0-4-8-12-16-…）或存储在向量中的特殊行号

现在我要做的是：

string line;
int counter = 0;
while( getline(ifstr,line) ){
   if(counter%4 =0){
      // some code working with line
   }
}

但是我想要这样的东西（如果更快的话）

让我再次提到，我有一些行索引（排序但不是常规的），但为了简单起见，我使用了product4的这个示例

编辑：我想一开始就跳转到行，例如，我知道我需要阅读第2000行，如何快速跳过1999行？

感谢所有将与文件行开头相对应的

std:：fstream:：streampos

实例存储到

std:：vector

中，然后您可以使用该向量的索引访问特定行。可能的实施如下：

class file_reader {
public:
    // load file streampos offsets during construction
    explicit file_reader(const std::string& filename) 
        : fs(filename) { cache_file_streampos(); }
    std::size_t lines() const noexcept { return line_streampos_vec.size(); }
    // get a std::string representation of specific line in file
    std::string read_line(std::size_t n) {
        if (n >= line_streampos_vec.size() - 1)
            throw std::out_of_range("out of bounds");
        navigate_to_line(n);
        std::string rtn_str;
        std::getline(fs, rtn_str);
        return rtn_str;
    }
private:
    std::fstream fs;
    std::vector<std::fstream::streampos> line_streampos_vec;
    const std::size_t max_line_length = // some sensible value
    // store file streampos instances in vector
    void cache_file_streampos() {
        std::string s;
        s.reserve(max_line_length);
        while (std::getline(fs, s)) 
            line_streampos_vec.push_back(fs.tellg());
    }
    // go to specific line in file stream
    void navigate_to_line(std::size_t n) {
        fs.clear();
        fs.seekg(line_streampos_vec[n]);
    }
};

类文件\u读取器{
公众：
//在构造期间加载文件streampos偏移
显式文件读取器（const std:：string和filename）
：fs（文件名）{cache_file_streampos（）；}
std:：size_t lines（）常量noexcept{return line_streampos_vec.size（）；}
//获取文件中特定行的std:：字符串表示形式
标准：：字符串读取线（标准：：大小）{
如果（n>=直线流位置向量大小（）-1）
抛出标准：：超出范围（“超出范围”）；
导航至_线（n）；
std：：字符串rtn_str；
std：：getline（fs，rtn_str）；
返回rtn_str；
}
私人：
std：：fstream fs；
std：：矢量线\流位置\矢量；
const std:：size\t max\u line\u length=//一些合理的值
//将文件streampos实例存储在vector中
无效缓存文件\u streampos（）{
std：：字符串s；
s、 预留（最大线路长度）；
while（std：：getline（fs，s））
直线向后推（fs.tellg（））；
}
//转到文件流中的特定行
无效导航到行（标准：：大小）{
fs.clear（）；
fs.seekg（流线位置向量[n]）；
}
};

然后你可以通过

file_reader fr("filename.ext");
for (int i = 0; i < fr.lines(); ++i) {
    if (!(i % 4))
        std::string line_contents = fr.read_line(i); // then do something with the string 
}

文件读取器fr（“filename.ext”）；
对于（int i=0；i

班特伯里大主教的回答很好，我同意他的观点，在进行预处理时，只需存储每行开头的字符位置，就可以获得更清晰的代码和更高的效率

但是，假设这是不可能的（可能预处理是由其他API处理的，或者来自用户输入），有一个解决方案应该只执行读取指定行所需的最小工作量

基本问题是，给定一个具有可变行长的文件，您无法知道每行的开始和结束位置，因为行定义为以

'\n'

结尾的字符序列。因此，您必须解析每个字符，以检查它是否为

'\n'

，如果是，请推进行计数器，并在行计数器与所需输入之一匹配时读取该行

auto retrieve_lines(std::ifstream& file_to_read, std::vector<int> line_numbers_to_read) -> std::vector<std::string>
{
    auto begin = std::istreambuf_iterator<char>(file_to_read);
    auto end = std::istreambuf_iterator<char>();

    auto current_line = 0;
    auto next_line_num = std::begin(line_numbers_to_read);

    auto output_lines = std::vector<std::string>();
    output_lines.reserve(line_numbers_to_read.size());  //this may be a silly "optimization," since all the strings are still separate unreserved buffers

    //we can bail if we've reached the end of the lines we want to read, even if there are lines remaining in the stream
    //we *must* bail if we've reached the end of the stream, even if there are supposedly lines left to read; that input must have been incorrect
    while(begin != end && next_line_num != std::end(line_numbers_to_read))
    {
        if(current_line == *next_line_num)
        {
            auto matching_line = std::string();
            if(*begin != '\n')
            {
                //potential optimization: reserve matching_line to something that you expect will fit most/all of your input lines
                while(begin != end && *begin != '\n')
                {
                    matching_line.push_back(*begin++);
                }
            }
            output_lines.emplace_back(matching_line);
            ++next_line_num;
        }
        else 
        {
            //skip this "line" by finding the next '\n'
            while(begin != end && *begin != '\n')
            {
                ++begin;
            }
        }

        //either code path in the previous if/else leaves us staring at the '\n' at the end of a line,
        //which is not the right state for the next iteration of the loop.
        //So skip this '\n' to get to the beginning of the next line
        if (begin != end && *begin == '\n')
        {
            ++begin;
        }

        ++current_line;
    }

    return output_lines;
}

自动检索\u行（std:：ifstream&file\u to\u read，std:：vector line\u number\u to\u read）->std:：vector
{
自动开始=std:：istreambuf_迭代器（文件到读取）；
auto end=std:：istreambuf_迭代器（）；
自动电流_线=0；
自动下一行数=标准：：开始（行数到读取）；
自动输出线=标准：：向量（）；
output_lines.reserve（line_numbers_to_read.size（））；//这可能是一个愚蠢的“优化”，因为所有字符串仍然是独立的无保留缓冲区
//如果我们已经读到了想读的行的末尾，即使流中还有行，我们也可以跳转
//如果我们到达了流的末尾，即使假定还有行要读，我们也必须退出；输入一定是不正确的
while（begin！=end&&next_line_num！=std:：end（line_number_to_read））
{
如果（当前行==*下一行数）
{
自动匹配_line=std:：string（）；
如果（*开始！='\n'）
{
//潜在优化：将匹配线保留到您期望适合大多数/所有输入线的内容
while（begin！=end&&*begin！='\n'）
{
匹配_行。向后推_（*begin++）；
}
}
输出线。将线放回（匹配线）；
++下一行；
}
其他的
{
//通过查找下一个“\n”跳过此“行”
while（begin！=end&&*begin！='\n'）
{
++开始；
}
}
//前面if/else中的任一代码路径都让我们盯着行末尾的“\n”，
//这不是循环下一次迭代的正确状态。
//因此，跳过此“\n”进入下一行的开头
如果（begin！=end&&*begin='\n'）
{
++开始；
}
++当前_线；
}
返回输出线；
}

在这里，它是生活在一起的。正如您所看到的，它正确地处理了空行，也正确地处理了被告知要获取比文件中更多的行。

因为@caps说这让他觉得标准库中没有任何东西可以帮助完成这类任务，所以我不得不证明：）

template <typename It, typename Out, typename Filter = std::vector<int> >
Out retrieve_lines(It begin, It const end, Filter lines, Out out, char const* delim = "\\n") {
    if (lines.empty())
        return out;

    // make sure input is orderly
    assert(std::is_sorted(lines.begin(), lines.end()));
    assert(lines.front() >= 0);

    std::regex re(delim);
    std::regex_token_iterator<It> line(begin, end, re, -1), eof;

    // make lines into incremental offsets
    std::adjacent_difference(lines.begin(), lines.end(), lines.begin());

    // iterate advancing by each offset requested
    auto advanced = [&line, eof](size_t n) { while (line!=eof && n--) ++line; return line; };

    for (auto offset = lines.begin(); offset != lines.end() && advanced(*offset) != eof; ++offset) {
        *out++ = *line;
    }

    return out;
}

int main() {
    std::vector<std::string> output_lines;
    std::string is(" a b c d e\nf g hijklmnop\nqrstuvw\nxyz");

    retrieve_lines(is.begin(), is.end(), {0,3,999}, back_inserter(output_lines));

    // for debug purposes
    for (auto& line : output_lines)
        std::cout << line << "\n";
}

#include <boost/iostreams/device/mapped_file.hpp>
int main() {
    boost::iostreams::mapped_file_source is("/etc/dictionaries-common/words");

    retrieve_lines(is.begin(), is.end(), {13,784, 9996}, std::ostream_iterator<std::string>(std::cout, "\n"));
}

现场演示2：从文件到

cout

template <typename It, typename Out, typename Filter = std::vector<int> >
Out retrieve_lines(It begin, It const end, Filter lines, Out out, char const* delim = "\\n") {
    if (lines.empty())
        return out;

    // make sure input is orderly
    assert(std::is_sorted(lines.begin(), lines.end()));
    assert(lines.front() >= 0);

    std::regex re(delim);
    std::regex_token_iterator<It> line(begin, end, re, -1), eof;

    // make lines into incremental offsets
    std::adjacent_difference(lines.begin(), lines.end(), lines.begin());

    // iterate advancing by each offset requested
    auto advanced = [&line, eof](size_t n) { while (line!=eof && n--) ++line; return line; };

    for (auto offset = lines.begin(); offset != lines.end() && advanced(*offset) != eof; ++offset) {
        *out++ = *line;
    }

    return out;
}

int main() {
    std::vector<std::string> output_lines;
    std::string is(" a b c d e\nf g hijklmnop\nqrstuvw\nxyz");

    retrieve_lines(is.begin(), is.end(), {0,3,999}, back_inserter(output_lines));

    // for debug purposes
    for (auto& line : output_lines)
        std::cout << line << "\n";
}

#include <boost/iostreams/device/mapped_file.hpp>
int main() {
    boost::iostreams::mapped_file_source is("/etc/dictionaries-common/words");

    retrieve_lines(is.begin(), is.end(), {13,784, 9996}, std::ostream_iterator<std::string>(std::cout, "\n"));
}

boost:：iostreams:：mapped_file_source

的使用可以很容易地替换为right up，但我发现在演示示例中它更难看

您是否可以存储文件偏移量（通过

tellg（）

）而不是行号？然后您可以稍后使用

seekg（）跳转到一个