C++ 解析数字串_C++_String_Parsing_C++11_Numbers

C++ 解析数字串

c++ string parsing c++11

C++ 解析数字串,c++,string,parsing,c++11,numbers,C++,String,Parsing,C++11,Numbers,我需要解析几个C风格的字符串（大约500k），其中包含由单个空格字符分隔的4个浮点数。以下是单个字符串的示例： “90292 5879 89042.2576 5879” 我需要将这些数字存储在两个代表两点的结构中。考虑到字符串可以在解析时修改，并且99.99%的数字都是无符号整数，那么最快的方法是什么以下是我目前的执行情况： #include <iostream> #include <cassert> #include <chrono> #include &

我需要解析几个C风格的字符串（大约500k），其中包含由单个空格字符分隔的4个浮点数。以下是单个字符串的示例：

“90292 5879 89042.2576 5879”

我需要将这些数字存储在两个代表两点的结构中。考虑到字符串可以在解析时修改，并且99.99%的数字都是无符号整数，那么最快的方法是什么

以下是我目前的执行情况：

#include <iostream>
#include <cassert>
#include <chrono>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
using namespace chrono;



struct PointF
{
    float x;
    float y;
};


void parse_points(char* points, PointF& p1, PointF& p2)
{
    auto start = points;
    const auto end = start + strlen(points);

    // p1.x
    start = std::find(start, end, ' ');
    assert(start < end);
    *start = '\0';
    p1.x = static_cast<float>(atof(points));
    points = start + 1;

    // p1.y
    start = std::find(start, end, ' ');
    assert(start < end);
    *start = '\0';
    p1.y = static_cast<float>(atof(points));
    points = start + 1;

    // p2.x
    start = std::find(start, end, ' ');
    assert(start < end);
    *start = '\0';
    p2.x = static_cast<float>(atof(points));
    points = start + 1;

    // p2.y
    start = std::find(start, end, ' ');
    assert(start == end);
    p2.y = static_cast<float>(atof(points));
}



int main()
{
    const auto n = 500000;
    char points_str[] = "90292 5879 89042.2576 5879";
    PointF p1, p2;

    vector<string> data(n);

    for (auto& s : data)
        s.assign(points_str);

    const auto t0 = system_clock::now();

    for (auto i = 0; i < n; i++)
        parse_points(const_cast<char*>(data[i].c_str()), p1, p2);

    const auto t1 = system_clock::now();
    const auto elapsed = duration_cast<milliseconds>(t1 - t0).count();

    cout << "Elapsed: " << elapsed << " ms" << endl;

    cin.get();
    return 0;
}

#包括
#包括
#包括
#包括
#包括
#包括
使用名称空间std；
使用名称空间计时；
结构点F
{
浮动x；
浮动y；
};
无效解析点（字符*点、点F&p1、点F&p2）
{
自动启动=点；
常量自动结束=开始+strlen（点）；
//p1.x
start=std:：find（开始，结束“”）；
断言（开始<结束）；
*开始='\0'；
p1.x=静态_投射（atof（点））；
积分=起点+1；
//p1.y
start=std:：find（开始，结束“”）；
断言（开始<结束）；
*开始='\0'；
p1.y=静态_投射（atof（点））；
积分=起点+1；
//p2.x
start=std:：find（开始，结束“”）；
断言（开始<结束）；
*开始='\0'；
p2.x=静态_投射（atof（点））；
积分=起点+1；
//p2.y
start=std:：find（开始，结束“”）；
断言（开始==结束）；
p2.y=静态_投射（atof（点））；
}
int main（）
{
常数自动n=500000；
char points_str[]=“90292 5879 89042.2576 5879”；
点f p1，p2；
矢量数据（n）；
用于（自动和s:数据）
s、 分配（点数）；
const auto t0=系统时钟：：现在（）；
用于（自动i=0；icout给定一个带浮点值的字符串，空格分隔：
const std::string example_input = "90292 5879 89042.2576 5879";

您应该配置文件以查看速度更快的内容，以浮点形式读取：
std::istringstream text_stream(example_input);
std::vector<double> container;
double value;
while (text_stream >> value)
{
  container.push_back(value);
}

std::istringstream text_stream(example_input);
std::vector<double> container;
double value;
signed int int_value;
std::streampos position_before_read = text_stream.tellg();
while (text_stream >> int_value)
{
  // check the next character for possible floating point differences.
  char c;
  text_stream >> c;
  switch (c)
  {
    case '.':
    case 'E': case 'e':
      // Rewind to before the number and read as floating point
      text_stream.seekg(position_before_read);
      text_stream >> value;
      break;
    default:
      value = 1.0 * int_value;
      break;
    }
  container.push_back(value);
  position_before_read = text_stream.tellg();
}

std:：istringstream文本流（示例输入）；
载体容器；
双重价值；
while（文本流>>值）
{
容器。推回（值）；
}

或读取为整数，如果有浮点指示，则性能会受到影响：
std::istringstream text_stream(example_input);
std::vector<double> container;
double value;
while (text_stream >> value)
{
  container.push_back(value);
}

std::istringstream text_stream(example_input);
std::vector<double> container;
double value;
signed int int_value;
std::streampos position_before_read = text_stream.tellg();
while (text_stream >> int_value)
{
  // check the next character for possible floating point differences.
  char c;
  text_stream >> c;
  switch (c)
  {
    case '.':
    case 'E': case 'e':
      // Rewind to before the number and read as floating point
      text_stream.seekg(position_before_read);
      text_stream >> value;
      break;
    default:
      value = 1.0 * int_value;
      break;
    }
  container.push_back(value);
  position_before_read = text_stream.tellg();
}

std:：istringstream文本流（示例输入）；
载体容器；
双重价值；
符号整数值；
std:：streampos position_before_read=text_stream.tellg（）；
while（文本流>>整数值）
{
//检查下一个字符是否存在可能的浮点差异。
字符c；
文本流>>c；
开关（c）
{
案例“”：
案例“E”：案例“E”：
//倒带到数字之前并读取为浮点
文本流。参见kg（读取前的位置）；
文本流>>值；
打破
违约：
值=1.0*int_值；
打破
}
容器。推回（值）；
将_置于_read=text_stream.tellg（）之前；
}

我的猜测是，标准库经过优化，可以读取浮点值，这比上面的示例要好得多，并且可以解释浮点格式的所有差异
注意：或者，您可以将小数和指数读取为整数（如果存在），然后使用所有三个部分构建一个浮点值。
您可以实现atof
，返回空间的位置。
。这样，您只需遍历每个字符串一次
例如
我发现代码中存在多个问题（您的提问实际上很好）：

当有数字时，案例没有错误处理（注意：根据讨论，在这种情况下，您预期为0）
创建PointF对象两次，以便能够传递它们

您将它们作为引用传递，因此，对于读取调用代码的人来说，这些是out参数并不是一件小事

您创建的解析器在C语言中可用（尽管您可能会测量它是快还是慢）

我建议：（注意，std:：experimental:：optional
在这里等同于boost:：optional
）
#包括
#包括
#包括
#包括
结构点F
{
浮动x；
浮动y；
};
std:：实验：：可选解析点（char*pch）
{
pch=strtok（pch，“”）；
如果（pch！=NULL）
{
浮点数x0=atof（pch）；
pch=strtok（空，“”）；
如果（pch！=NULL）
{
浮点数y0=atof（pch）；
pch=strtok（空，“”）；
如果（pch！=NULL）
{
浮点数x1=atof（pch）；
pch=strtok（空，“”）；
如果（pch！=NULL）
{
浮动y1=atof（pch）；
点f p0{x0，y0}，p1{x1，y1}；
返回std:：make_对（p0，p1）；
}
}
}
}
返回std：：实验：：nullopt；
}
int main（）{
const char str[]=“90292 5879 89042.2576 5879”；
char*pch0=新字符[sizeof（str）]，*pch=pch0；
memcpy（pch0，str，sizeof（str））；
std:：实验：：可选pOpt（解析点（pch0））；
如果（pOpt）
std:：cout first.x这是我的版本，没有strlen
，但是使用了strtok\u。
在我的机器上，它需要1.1秒
而不是1.5秒

void parse_points(char* points, PointF& p1, PointF& p2)
{
    char *next_token1 = nullptr;

    // p1.x
    points = strtok_s(points, " ", &next_token1);
    p1.x = points ? static_cast<float>(atof(points)) : 0.0f;

    // p1.y
    points = strtok_s(nullptr, " ", &next_token1);
    p1.y = points ? static_cast<float>(atof(points)) : 0.0f;

    // p2.x
    points = strtok_s(nullptr, " ", &next_token1);
    p2.x = points ? static_cast<float>(atof(points)) : 0.0f;

    // p2.y
    points = strtok_s(nullptr, " ", &next_token1);
    p2.y = points ? static_cast<float>(atof(points)) : 0.0f;
}



int main()
{
    const auto n = 500000;
    char points_str[] = "90292 5879 89042.2576 5879";
    PointF p1, p2;

    vector<string> data(n);

    for (auto& s : data)
        s.assign(points_str);

    const auto t0 = system_clock::now();

    for (auto i = 0; i < n; i++)
        parse_points(const_cast<char*>(data[i].c_str()), p1, p2);

    const auto t1 = system_clock::now();
    const auto elapsed = duration_cast<milliseconds>(t1 - t0).count();

    cout << "Elapsed: " << elapsed << " ms" << endl;

    //cin.get();
    return 0;
}

void parse_点（字符*点、点F&p1、点F&p2）
{
char*next_token1=nullptr；
//p1.x
点数=strtok（点数，“，&next_标记1）；
p1.x=点？静态投影（atof（点））：0.0f；
//p1.y
点数=strtok_s（nullptr，“，&next_token1）；
p1.y=点？静态投影（atof（点））：0.0f；
//p2.x
点数=strtok_s（nullptr，“，&next_token1）；
p2.x=点？静态投影（atof（点））：0.0f；
//p2.y
点数=strtok_s（nullptr，“，&next_token1）；
p2.y=点？静态投影（atof（点））：0.0f；
}
int main（）
{
常数自动n=500000；
char points_str[]=“90292 5879 89042.2576 5879”；
点f p1，p2；
矢量数据（n）；
用于（自动和s:数据）
s