C++ 如何加速包含图形数据的文本文件的io/解析
我的任务是实现A*算法,但是我从文件中读取图形节点数据的速度非常慢,大约需要4分钟,我想知道是否有办法显著加快速度 kart对象只是一个节点向量,这个问题的焦点是ioC++ 如何加速包含图形数据的文本文件的io/解析,c++,parsing,io,ifstream,C++,Parsing,Io,Ifstream,我的任务是实现A*算法,但是我从文件中读取图形节点数据的速度非常慢,大约需要4分钟,我想知道是否有办法显著加快速度 kart对象只是一个节点向量,这个问题的焦点是io ifstream ifs_edge("<my path>", ios::binary | ios::ate); auto edges_size = ifs_edge.tellg(); ifs_edge.seekg(ios::beg); string str_edges(edges_size, 0
ifstream ifs_edge("<my path>", ios::binary | ios::ate);
auto edges_size = ifs_edge.tellg();
ifs_edge.seekg(ios::beg);
string str_edges(edges_size, 0);
ifs_edge.read(&str_edges[0], edges_size);
cout << edges_size << endl;
int counter = 0;
double lon = 0, lat = 0;
string substr;
for (char c : str_edges)
{
if (c != 0x20 && c != 0x0A)
substr += c;
else
{
if (substr.size())
{
if (counter == 1)
lon = stod(substr);
if (counter == 2)
lat = stod(substr);
substr.clear();
counter++;
}
}
if (c == 0x0A)
{
counter = 0;
if (lon && lat)
{
astar::kart_node kn;
kn.c = { lon, lat };
kart.push_back(kn);
}
}
}
编辑2:
提供了更快的解决方案,这是很好的,但由于一些奇怪的原因,速度对我来说太慢了,我也试过几台计算机。谜团依然存在,我愿意接受更多的建议,尽管我的解决方案有所改进,但这个问题还没有被标记为已解决 您正在代码中执行大量字符串操作和单个转换。所有这些都可以避免,经度和纬度可以直接读入浮点变量。在这种情况下,看起来整个事情可以简化为
ifstream ifs_edge("<my path>");
int eater; // used to eat the first line and the index of each lon,lat pair
double lon, lat;
std::size_t size;
ifs_edge >> size; //get size for vector
std::vector<astar::kart_node> kart;
kart.reserve(size);
while(ifs_edge >> eater >> lon >> lat)
kart.emplace_back(lon, lat);
现在我们为向量保留了空间,以避免重复的内存分配和拷贝;没有更多的字符串转换;每个kart_节点都是在向量中构造的,而不是构造然后复制的。您在代码中进行了大量的字符串操作和单个转换。所有这些都可以避免,经度和纬度可以直接读入浮点变量。在这种情况下,看起来整个事情可以简化为
ifstream ifs_edge("<my path>");
int eater; // used to eat the first line and the index of each lon,lat pair
double lon, lat;
std::size_t size;
ifs_edge >> size; //get size for vector
std::vector<astar::kart_node> kart;
kart.reserve(size);
while(ifs_edge >> eater >> lon >> lat)
kart.emplace_back(lon, lat);
现在我们为向量保留了空间,以避免重复的内存分配和拷贝;没有更多的字符串转换;每个kart_节点都是在向量中构造的,而不是构造然后复制的。由于数据是标准化的,所以可以使用fstream或fscanf 以下是输出: fstream大约需要7.2秒 fscanf大约需要1.6秒
Reading from data.txt
1000000 entry read!
2000000 entry read!
3000000 entry read!
last entry: -116.239,89.5744
read_file_cpp 7.19992s
1000000 entry read!
2000000 entry read!
3000000 entry read!
last entry: -116.239,89.5744
read_file_c 1.55708s
下面是用clang++-O3编译的代码
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <cmath>
#include <ctime>
#include <cstdio>
using namespace std;
void read_file_cpp(const string& path, vector<pair<double, double> >* kart) {
ifstream in(path);
int n, index;
double lon, lat;
in >> n;
for (int i=1;i<=n;++i) {
in >> index >> lon >> lat;
kart->push_back(make_pair(lon, lat));
if(i % 1000000 == 0)
cout << i << " entry read!" <<endl;
}
in.close();
}
void read_file_c(const string& path, vector<pair<double, double> >* kart) {
FILE* f = fopen(path.c_str(),"r");
int n, index;
fscanf (f, "%d", &n);
double lon, lat;
for (int i=1;i<=n;++i) {
fscanf(f, "%d %lf %lf", &index, &lon, &lat);
kart->push_back(make_pair(lon, lat));
if(i % 1000000 == 0)
cout << i << " entry read!" <<endl;
}
fclose(f);
}
void generate_data(const string& path, int n) {
cout << "generating data... " << endl;
ofstream out(path);
out << n << endl;
for (int i=0;i<n;++i) {
float lon = static_cast <float> (rand()) / static_cast <float> (RAND_MAX) * 360 - 180;
float lat = static_cast <float> (rand()) / static_cast <float> (RAND_MAX) * 360 - 180;
out << i << " " << lon << " " << lat << endl;
}
out.close();
cout << "done !" << endl;
}
int main() {
const string path = "data.txt";
vector<pair<double, double> > data;
// generate_data(path, 3901630);
{
clock_t begin = clock();
read_file_cpp(path, &data);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "last entry: " << data.back().first << "," << data.back().second << endl;
cout << "read_file_cpp "<< elapsed_secs << "s" << endl;
}
data.clear();
{
clock_t begin = clock();
read_file_c(path, &data);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "last entry: " << data.back().first << "," << data.back().second << endl;
cout << "read_file_c "<< elapsed_secs << "s" << endl;
}
}
因为您的数据是标准化的,所以可以使用fstream或fscanf 以下是输出: fstream大约需要7.2秒 fscanf大约需要1.6秒
Reading from data.txt
1000000 entry read!
2000000 entry read!
3000000 entry read!
last entry: -116.239,89.5744
read_file_cpp 7.19992s
1000000 entry read!
2000000 entry read!
3000000 entry read!
last entry: -116.239,89.5744
read_file_c 1.55708s
下面是用clang++-O3编译的代码
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <cmath>
#include <ctime>
#include <cstdio>
using namespace std;
void read_file_cpp(const string& path, vector<pair<double, double> >* kart) {
ifstream in(path);
int n, index;
double lon, lat;
in >> n;
for (int i=1;i<=n;++i) {
in >> index >> lon >> lat;
kart->push_back(make_pair(lon, lat));
if(i % 1000000 == 0)
cout << i << " entry read!" <<endl;
}
in.close();
}
void read_file_c(const string& path, vector<pair<double, double> >* kart) {
FILE* f = fopen(path.c_str(),"r");
int n, index;
fscanf (f, "%d", &n);
double lon, lat;
for (int i=1;i<=n;++i) {
fscanf(f, "%d %lf %lf", &index, &lon, &lat);
kart->push_back(make_pair(lon, lat));
if(i % 1000000 == 0)
cout << i << " entry read!" <<endl;
}
fclose(f);
}
void generate_data(const string& path, int n) {
cout << "generating data... " << endl;
ofstream out(path);
out << n << endl;
for (int i=0;i<n;++i) {
float lon = static_cast <float> (rand()) / static_cast <float> (RAND_MAX) * 360 - 180;
float lat = static_cast <float> (rand()) / static_cast <float> (RAND_MAX) * 360 - 180;
out << i << " " << lon << " " << lat << endl;
}
out.close();
cout << "done !" << endl;
}
int main() {
const string path = "data.txt";
vector<pair<double, double> > data;
// generate_data(path, 3901630);
{
clock_t begin = clock();
read_file_cpp(path, &data);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "last entry: " << data.back().first << "," << data.back().second << endl;
cout << "read_file_cpp "<< elapsed_secs << "s" << endl;
}
data.clear();
{
clock_t begin = clock();
read_file_c(path, &data);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "last entry: " << data.back().first << "," << data.back().second << endl;
cout << "read_file_c "<< elapsed_secs << "s" << endl;
}
}
你能显示文件的外观吗?当然,我现在添加了。你能显示文件的外观吗?当然,我现在添加了。提示。如果只生成一行数据,这将破坏整个读取过程!您应该使它更健壮,以防止攻击等等!我得到了一个例外,std::length_error,我还注意到该文件的双精度和索引之间实际上有一些过大的间距,这有关系吗?@Jontahan间距不应该有关系,因为>>忽略了空白。@ThomasRoskop输入验证留给OP。为了通用性和简洁性,这里省略了它。@NathanOliver:没关系!暗示如果只生成一行数据,这将破坏整个读取过程!您应该使它更健壮,以防止攻击等等!我得到了一个例外,std::length_error,我还注意到该文件的双精度和索引之间实际上有一些过大的间距,这有关系吗?@Jontahan间距不应该有关系,因为>>忽略了空白。@ThomasRoskop输入验证留给OP。为了通用性和简洁性,这里省略了它。@NathanOliver:没关系!所以你的read_file_c是我迄今为止尝试过的最快的函数,但它仍然使用100s.read_file_cpp 94.965s,字面上是相同的代码,我也使用了你的文件生成器。在这种情况下,问题可能是你的,IO不再是瓶颈。所以你的read_file_c是我迄今为止尝试过的最快的函数,但它仍然使用100s.read_file_cpp 94.965s,字面上是相同的代码,我也使用了您的文件生成器。在这种情况下,问题可能是您的,IO不再是瓶颈。