C++ 使用令牌仅解析csv文件中的特定列
如果我有一个用逗号分隔的值填充的文件,例如:C++ 使用令牌仅解析csv文件中的特定列,c++,file,parsing,csv,ifstream,C++,File,Parsing,Csv,Ifstream,如果我有一个用逗号分隔的值填充的文件,例如: "myComputer",5,192.168.1.0,25 "herComputer",6,192.168.1.1,26 "hisComputer",7,192.168.1.2,27 我想把数据作为字符串取出,我会这样做: std::string line; std::ifstream myfile ("myCSVFile.txt"); if(myfile.is_open()) { while(getline(myfile,line))
"myComputer",5,192.168.1.0,25
"herComputer",6,192.168.1.1,26
"hisComputer",7,192.168.1.2,27
我想把数据作为字符串取出,我会这样做:
std::string line;
std::ifstream myfile ("myCSVFile.txt");
if(myfile.is_open())
{
while(getline(myfile,line))
{
std::string tempString = line;
std::string delimiter = ",";
}
}
为了单独解析每个值,我使用如下方法:
有没有一种方法可以使用上述方法来实现这一点,或者我应该使用完全不同的方法?谢谢,正如上面的评论所建议的,答案是只输出我想要的专栏。我通过添加一个计数器和一个执行实际打印的循环来实现它。我本可以轻松地将计数器添加到for循环中,但我没有
std::string s = "scott>=tiger>=mushroom";
std::string delimiter = ">=";
size_t pos = 0;
std::string token;
int counter = 0;
while ((pos = s.find(delimiter)) != std::string::npos)
{
token = s.substr(0, pos);
if(counter == 0 || counter == 2)
{
std::cout << token << std::endl;
}
s.erase(0, pos + delimiter.length());
}
std::string s=“scott>=老虎>=蘑菇”;
标准::字符串分隔符=“>=”;
大小\u t pos=0;
字符串标记;
int计数器=0;
while((pos=s.find(delimiter))!=std::string::npos)
{
令牌=s.substr(0,位置);
如果(计数器==0 | |计数器==2)
{
std::cout使用专用库来完成此任务要容易得多。有了它,这是一件轻而易举的事:
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <boost/tokenizer.hpp>
int main()
{
std::ifstream myfile("myCSVFile.txt");
if (myfile.is_open())
{
std::string line;
while (std::getline(myfile, line))
{
typedef boost::escaped_list_separator<char> Separator;
typedef boost::tokenizer<Separator> Tokenizer;
std::vector<std::string> tokens;
Tokenizer tokenizer(line);
for (Tokenizer::iterator iter = tokenizer.begin(); iter != tokenizer.end(); ++iter)
{
tokens.push_back(*iter);
}
if (tokens.size() == 4)
{
std::cout << tokens[0] << "\t" << tokens[2] << "\n";
}
else
{
std::cerr << "illegal line\n";
}
}
}
}
如您所见,其思想是将一行的所有值存储在std::vector
中,然后输出所需的值
现在,如果您真的处理大型文件,这可能会导致性能问题。在这种情况下,请将计数器与标记器一起使用:
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <boost/tokenizer.hpp>
int main()
{
std::ifstream myfile("myCSVFile.txt");
if (myfile.is_open())
{
std::string line;
while (std::getline(myfile, line))
{
typedef boost::escaped_list_separator<char> Separator;
typedef boost::tokenizer<Separator> Tokenizer;
Tokenizer tokenizer(line);
int count = 0;
for (Tokenizer::iterator iter = tokenizer.begin(); (iter != tokenizer.end()) && (count < 3); ++iter)
{
if ((count == 0) || (count == 2))
{
std::cout << *iter;
if (count == 0)
{
std::cout << "\t";
}
}
++count;
}
std::cout << "\n";
}
}
}
#包括
#包括
#包括
#包括
#包括
int main()
{
std::ifstream myfile(“myCSVFile.txt”);
如果(myfile.is_open())
{
std::字符串行;
while(std::getline(myfile,line))
{
typedef boost::转义\列表\分隔符;
typedef boost::标记器标记器;
标记器标记器(行);
整数计数=0;
对于(标记器::迭代器iter=Tokenizer.begin();(iter!=Tokenizer.end())&&(计数<3);+iter)
{
如果((计数=0)| |(计数=2))
{
std::不能在while循环中使用计数器来计算列数并只输出所需的列?或者我遗漏了什么?只是..不要输出第二列..逗号不一定是分隔符。特别是,如果在带引号的字符串中找到逗号,那就不是分隔符。您的方法假设不是。是的,它确实假设否则。对于这个问题,让我们假设逗号将仅用作分隔符。感谢您回答这个问题,并提供了更优雅的解决方案。
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <boost/tokenizer.hpp>
int main()
{
std::ifstream myfile("myCSVFile.txt");
if (myfile.is_open())
{
std::string line;
while (std::getline(myfile, line))
{
typedef boost::escaped_list_separator<char> Separator;
typedef boost::tokenizer<Separator> Tokenizer;
std::vector<std::string> tokens;
Tokenizer tokenizer(line);
for (Tokenizer::iterator iter = tokenizer.begin(); iter != tokenizer.end(); ++iter)
{
tokens.push_back(*iter);
}
if (tokens.size() == 4)
{
std::cout << tokens[0] << "\t" << tokens[2] << "\n";
}
else
{
std::cerr << "illegal line\n";
}
}
}
}
for (auto &token : tokenizer)
{
tokens.push_back(token);
}
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include <boost/tokenizer.hpp>
int main()
{
std::ifstream myfile("myCSVFile.txt");
if (myfile.is_open())
{
std::string line;
while (std::getline(myfile, line))
{
typedef boost::escaped_list_separator<char> Separator;
typedef boost::tokenizer<Separator> Tokenizer;
Tokenizer tokenizer(line);
int count = 0;
for (Tokenizer::iterator iter = tokenizer.begin(); (iter != tokenizer.end()) && (count < 3); ++iter)
{
if ((count == 0) || (count == 2))
{
std::cout << *iter;
if (count == 0)
{
std::cout << "\t";
}
}
++count;
}
std::cout << "\n";
}
}
}
std::vector<std::string> tokens;
while ((pos = s.find(delimiter)) != std::string::npos) {
token = s.substr(0, pos);
tokens.push_back(token);
s.erase(0, pos + delimiter.length());
}
if (tokens.size() == 4)
{
std::cout << tokens[0] << "\t" << tokens[2] << "\n";
}
else
{
std::cerr << "illegal line\n";
}
int count = 0;
while ((pos = s.find(delimiter)) != std::string::npos && (count < 4)) {
token = s.substr(0, pos);
if ((count == 0) || (count == 2))
{
std::cout << token;
if (count == 0)
{
std::cout << "\t";
}
}
++count;
s.erase(0, pos + delimiter.length());
}