Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/130.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 高效解析修复消息c++;_C++_Performance_Fix Protocol - Fatal编程技术网

C++ 高效解析修复消息c++;

C++ 高效解析修复消息c++;,c++,performance,fix-protocol,C++,Performance,Fix Protocol,我需要解析一个包含财务修复协议的文件。样本如下: 1128=99=24535=X49=CME75=2017040934=82452=2017040920070508394791460=201704092007050800000005799=10000000268=2279=0269=B48=900655=ESM783=23271=1473460731=100000005796=17263279=0269=C48=900655=ESM783=24271=2861528731=10000000579

我需要解析一个包含财务修复协议的文件。样本如下:

1128=99=24535=X49=CME75=2017040934=82452=2017040920070508394791460=201704092007050800000005799=10000000268=2279=0269=B48=900655=ESM783=23271=1473460731=100000005796=17263279=0269=C48=900655=ESM783=24271=2861528731=100000005796=1726310=219
我的应用程序将加载许多文件,每个文件都包含数百万行历史数据,因此需要考虑性能

我在网上回顾了有关修复解析的类似问题,并探索了QuickFix库(特别是使用FIX::Message(string)来破解消息),但我的目标是获得比使用QuickFix更好的吞吐量

我为最常见的消息类型(市场数据增量刷新)编写了一个模拟,以查看我所达到的速度,并且我对每秒60000条消息的结果(包括对3m行文件的文件解析)最不满意

<>这是我的第一个C++应用程序,所以我希望我的方法有很多缺陷,并且对如何提高性能的任何建议都将非常感激。 当前流程为文件->字符串->MDIncrementalRefresh。MDIncrementalRefresh有两个可选的重复组,我使用向量来存储它们,因为它们在消息之间的大小未知

我猜,与通过更新前一次MDIncrementalRefresh的内容重新使用对象相比,我在每次更新时重建MDIncrementalRefresh会导致不必要的开销

提前谢谢

#include <string>
#include <vector>
#include <iostream>
#include <fstream>

using namespace std;

std::vector<std::string> string_split(std::string s, const char delimiter)
{
    size_t start=0;
    size_t end=s.find_first_of(delimiter);

    std::vector<std::string> output;

    while (end <= std::string::npos)
    {
        output.emplace_back(s.substr(start, end-start));

        if (end == std::string::npos)
            break;

        start=end+1;
        end = s.find_first_of(delimiter, start);
    }

    return output;
}

const char FIX_FIELD_DELIMITER = '\x01';
const char FIX_KEY_DELIMITER = '=';

const int STR_TO_CHAR = 0;
const int KEY = 0;
const int VALUE = 1;

const string Field_TransactTime = "60";
const string Field_MatchEventIndicator = "5799";
const string Field_NoMDEntries = "268";
const string Field_MDUpdateAction = "279";
const string Field_MDEntryType = "269";
const string Field_SecurityID = "48";
const string Field_RptSeq = "83";
const string Field_MDEntryPx = "270";
const string Field_MDEntrySize = "271";
const string Field_NumberOfOrders = "346";
const string Field_MDPriceLevel = "1023";
const string Field_OpenCloseSettlFlag = "286";
const string Field_AggressorSide = "5797";
const string Field_TradingReferenceDate = "5796";
const string Field_HighLimitPrice = "1149";
const string Field_LowLimitPrice = "1148";
const string Field_MaxPriceVariation = "1143";
const string Field_ApplID = "1180";
const string Field_NoOrderIDEntries = "37705";
const string Field_OrderID = "37";
const string Field_LastQty = "32";
const string Field_SettlPriceType= "731";

class OrderIdEntry {
public:
    string OrderID;
    int LastQty;
};

struct MDEntry {
public:
    // necessary for defaults?
    char MDUpdateAction;
    char MDEntryType;
    int SecurityID;
    int RptSeq;
    double MDEntryPx;
    int MDEntrySize;
    int NumberOfOrders = 0;
    int MDPriceLevel = 0;
    int OpenCloseSettlFlag = 0;
    string SettlPriceType = "";
    int AggressorSide = 0;
    string TradingReferenceDate = "";
    double HighLimitPrice = 0.0;
    double LowLimitPrice = 0.0;
    double MaxPriceVariation = 0.0;
    int ApplID = 0;

};

class MDIncrementalRefresh {

public:
    string TransactTime;
    string MatchEventIndicator;
    int NoMDEntries;
    int NoOrderIDEntries = 0;
    vector<MDEntry> MDEntries;
    vector<OrderIdEntry> OrderIdEntries;

    MDIncrementalRefresh(const string& message)
    {

        MDEntry* currentMDEntry = nullptr;
        OrderIdEntry* currentOrderIDEntry = nullptr;

        for (auto fields : string_split(message, FIX_FIELD_DELIMITER))
        {
            vector<string> kv = string_split(fields, FIX_KEY_DELIMITER);

            // Header :: MDIncrementalRefresh

            if (kv[KEY] == Field_TransactTime) this->TransactTime = kv[VALUE];

            else if (kv[KEY] == Field_MatchEventIndicator) this->MatchEventIndicator = kv[VALUE];
            else if (kv[KEY] == Field_NoMDEntries) this->NoMDEntries = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_NoOrderIDEntries) this->NoOrderIDEntries = stoi(kv[VALUE]);

            // Repeating Group :: MDEntry

            else if (kv[KEY] == Field_MDUpdateAction)
            {
                MDEntries.push_back(MDEntry());
                currentMDEntry = &MDEntries.back(); // use pointer for fast lookup on subsequent repeating group fields
                currentMDEntry->MDUpdateAction = kv[VALUE][STR_TO_CHAR];
            }
            else if (kv[KEY] == Field_MDEntryType) currentMDEntry->MDEntryType = kv[VALUE][STR_TO_CHAR];
            else if (kv[KEY] == Field_SecurityID) currentMDEntry->SecurityID = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_RptSeq) currentMDEntry->RptSeq = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_MDEntryPx) currentMDEntry->MDEntryPx = stod(kv[VALUE]);
            else if (kv[KEY] == Field_MDEntrySize) currentMDEntry->MDEntrySize = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_NumberOfOrders) currentMDEntry->NumberOfOrders = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_MDPriceLevel) currentMDEntry->MDPriceLevel = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_OpenCloseSettlFlag) currentMDEntry->OpenCloseSettlFlag = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_SettlPriceType) currentMDEntry->SettlPriceType= kv[VALUE];
            else if (kv[KEY] == Field_AggressorSide) currentMDEntry->AggressorSide = stoi(kv[VALUE]);
            else if (kv[KEY] == Field_TradingReferenceDate) currentMDEntry->TradingReferenceDate = kv[VALUE];
            else if (kv[KEY] == Field_HighLimitPrice) currentMDEntry->HighLimitPrice = stod(kv[VALUE]);
            else if (kv[KEY] == Field_LowLimitPrice) currentMDEntry->LowLimitPrice = stod(kv[VALUE]);
            else if (kv[KEY] == Field_MaxPriceVariation) currentMDEntry->MaxPriceVariation = stod(kv[VALUE]);
            else if (kv[KEY] == Field_ApplID) currentMDEntry->ApplID = stoi(kv[VALUE]);

            // Repeating Group :: OrderIDEntry
            else if (kv[KEY] == Field_OrderID) {
                OrderIdEntries.push_back(OrderIdEntry());
                currentOrderIDEntry = &OrderIdEntries.back();
                currentOrderIDEntry->OrderID = kv[VALUE];
            }

            else if (kv[KEY] == Field_LastQty) currentOrderIDEntry->LastQty = stol(kv[VALUE]);
        }
    }


};

int main() {

    //std::string filename = "test/sample";

    std::string line;
    std::ifstream file (filename);

    int count = 0;
    if (file.is_open())
    {
        while ( std::getline( file, line ) )
        {
            MDIncrementalRefresh md(line);
            if (md.TransactTime != "") {
                count++;
            }
        }
        file.close();
    }
    cout << count << endl;
    return 0;
}
#包括
#包括
#包括
#包括
使用名称空间std;
std::vector string_split(std::string s,常量字符分隔符)
{
大小\u t开始=0;
size\u t end=s.find\u first\u of(分隔符);
std::矢量输出;
而(结束时间=千伏[值];
否则,如果(kv[键]==字段_MatchEventIndicator)此->MatchEventIndicator=kv[值];
否则,如果(kv[键]==字段\u NoMDEntries)此->NoMDEntries=stoi(kv[值]);
否则,如果(kv[键]==字段\u NoOrderIDEntries)此->NoOrderIDEntries=stoi(kv[值]);
//重复组::MDEntry
else if(kv[键]==字段\u MDUpdateAction)
{
MDEntry.push_back(MDEntry());
currentMDEntry=&MDEntries.back();//使用指针快速查找后续重复组字段
currentMDEntry->MDUpdateAction=kv[VALUE][STR_TO_CHAR];
}
否则,如果(kv[KEY]==Field\u MDEntryType)currentmentry->MDEntryType=kv[VALUE][STR\u TO\u CHAR];
否则,如果(kv[键]==字段_SecurityID)currentMDEntry->SecurityID=stoi(kv[值]);
否则,如果(kv[键]==字段_RptSeq)currentMDEntry->RptSeq=stoi(kv[值]);
否则,如果(kv[键]==Field\u MDEntryPx)currentMDEntry->MDEntryPx=stod(kv[值]);
否则,如果(kv[键]==字段\u MDEntrySize)currentMDEntry->MDEntrySize=stoi(kv[值]);
否则,如果(kv[键]==字段_NumberOfOrders)currentMDEntry->NumberOfOrders=stoi(kv[值]);
否则,如果(kv[键]==字段\MDPriceLevel)currentMDEntry->MDPriceLevel=stoi(kv[值]);
否则,如果(kv[键]==字段_OpenCloseSettleFlag)currentMDEntry->OpenCloseSettleFlag=stoi(kv[值]);
否则,如果(kv[键]==字段_结算价格类型)currentMDEntry->SettlePriceType=kv[值];
否则,如果(kv[键]==字段\攻击者端)currentMDEntry->Accessor端=stoi(kv[值]);
如果(kv[键]==字段\交易参考日期)currentMDEntry->TradingReferenceDate=kv[值];
否则,如果(kv[键]==Field_HighLimitPrice)currentMDEntry->HighLimitPrice=stod(kv[值]);
否则,如果(kv[键]==字段\u下限价格)currentMDEntry->LowLimitPrice=stod(kv[值]);
否则,如果(kv[键]==字段_MaxPriceVariation)currentMDEntry->MaxPriceVariation=stod(kv[值]);
否则,如果(kv[键]==字段\应用ID)currentMDEntry->ApplID=stoi(kv[值]);
//重复组::OrderIdentity
else if(kv[键]==字段\u订单ID){
orderIdentity.push_back(orderIdentity());
CurrentOrderIdentity=&OrderIdentity.back();
CurrentOrderIdentity->OrderID=kv[值];
}
如果(kv[键]==字段\最后数量)CurrentOrderIdentity->LastQty=stol(kv[值]),则为else;
}
}
};
int main(){
//std::string filename=“测试/样本”;
std::字符串行;
std::ifstream文件(文件名);
整数计数=0;
if(file.is_open())
{
while(std::getline(文件,行))
{
MDIncrementalRefresh md(行);
如果(md.transact时间!=“”){
计数++;
}
}
file.close();
}

cout对于那些感兴趣的人来说,处理上述代码的大部分时间都在split_string函数中。对split_string的大量调用导致在堆上执行许多(昂贵的)分配


另一种实现split_string_optim重新使用预先分配的向量。这可以防止在每次split_string函数调用时进行不必要的堆分配/扩展。下面运行1.5m迭代的示例表明速度提高了3.4倍。通过使用vector.clear()它本身不将内存分配给堆,它确保了对SpRITHYSTRIGIGOOPROTM的后续SPLITYSL调用,其中得到的向量大小<代码>这是我的第一个C++应用程序< /C>,并且从一开始就坚持吞吐量。我想感谢你抽出时间来复习我的问题。虽然我提到这是我第一次使用C++,但我没有说这是我第一次编写软件。因此我完全有能力解决问题,但希望能对如何更好地理解和理解POTEN提供一些有益的指导。tial瓶颈(例如重复调用split_string可能会隐式地扩展堆分配)。
#include <string>
#include <vector>

void string_split_optim(std::vector<std::string>& output, const std::string &s, const char delimiter)
{
    output.clear();

    size_t start = 0;
    size_t end = s.find_first_of(delimiter);


    while (end <= std::string::npos)
    {
        output.emplace_back(s.substr(start, end - start));

        if (end == std::string::npos)
            break;

        start = end + 1;
        end = s.find_first_of(delimiter, start);
    }

}


int main()
{
    const int NUM_RUNS = 1500000;
    const std::string s = "1128=9\u00019=174\u000135=X\u000149=CME\u000175=20170403\u000134=1061\u000152=20170402211926965794928\u000160=20170402211926965423233\u00015799=10000100\u0001268=1\u0001279=1\u0001269=1\u000148=9006\u000155=ESM7\u000183=118\u0001270=236025.0\u0001271=95\u0001346=6\u00011023=9\u000110=088\u0001";

    std::vector<std::string> vec;

    // standard
    clock_t tStart = clock();
    for (int i = 0; i < NUM_RUNS; ++i)
    {
        vec = string_split(s, '=');
    }

    printf("Time taken: %.2fs\n", (double) (clock() - tStart) / CLOCKS_PER_SEC);

    // reused vector
    tStart = clock();
    for (int i = 0; i < NUM_RUNS; ++i)
    {
        string_split_optim(vec, s, '=');
        vec.clear();
    }

    printf("Time taken: %.2fs\n", (double) (clock() - tStart) / CLOCKS_PER_SEC);
}
Time taken: 6.60s
Time taken: 1.94s