我需要解析一個包含金融FIX協議的文件。示例如下:所以需要考慮性能高效解析FIX消息C++
1128=99=24535=X49=CME75=2017040934=82452=2017040920070508394791460=201704092007050800000005799=10000000268=2279=0269=B48=900655=ESM783=23271=1473460731=100000005796=17263279=0269=C48=900655=ESM783=24271=2861528731=100000005796=1726310=219
我的應用程序將加載每個許多文件與數以百萬計的歷史數據行。
我已經回顧了FIX解析的相關問題,並探討了QuickFix庫(特別是使用FIX :: Message(字符串)來破解消息),但是我的目標是吞吐量比我能夠達到的更好實現使用quickfix。
我寫了一個模擬最常見的消息類型(市場數據增量刷新),以查看我正在實現的速度的種類,並且最令我印象深刻的結果是〜60,000消息/秒,包括文件解析一個3米行文件。
這是我的第一個C++應用程序,所以我期待在我的方法中存在很多缺陷,並且如何改進其性能的任何建議將不勝感激。
目前流程是file-> string-> MDIncrementalRefresh。 MDIncrementalRefresh有兩個可選的重複組,我使用一個向量來存儲,因爲它們從消息到消息的大小未知。
我在猜測我每次更新時重建MDIncrementalRefresh的事實都會導致不必要的開銷,如果我要通過更新之前MDIncrementalRefresh的內容來重新使用該對象,
由於提前
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;
std::vector<std::string> string_split(std::string s, const char delimiter)
{
size_t start=0;
size_t end=s.find_first_of(delimiter);
std::vector<std::string> output;
while (end <= std::string::npos)
{
output.emplace_back(s.substr(start, end-start));
if (end == std::string::npos)
break;
start=end+1;
end = s.find_first_of(delimiter, start);
}
return output;
}
const char FIX_FIELD_DELIMITER = '\x01';
const char FIX_KEY_DELIMITER = '=';
const int STR_TO_CHAR = 0;
const int KEY = 0;
const int VALUE = 1;
const string Field_TransactTime = "60";
const string Field_MatchEventIndicator = "5799";
const string Field_NoMDEntries = "268";
const string Field_MDUpdateAction = "279";
const string Field_MDEntryType = "269";
const string Field_SecurityID = "48";
const string Field_RptSeq = "83";
const string Field_MDEntryPx = "270";
const string Field_MDEntrySize = "271";
const string Field_NumberOfOrders = "346";
const string Field_MDPriceLevel = "1023";
const string Field_OpenCloseSettlFlag = "286";
const string Field_AggressorSide = "5797";
const string Field_TradingReferenceDate = "5796";
const string Field_HighLimitPrice = "1149";
const string Field_LowLimitPrice = "1148";
const string Field_MaxPriceVariation = "1143";
const string Field_ApplID = "1180";
const string Field_NoOrderIDEntries = "37705";
const string Field_OrderID = "37";
const string Field_LastQty = "32";
const string Field_SettlPriceType= "731";
class OrderIdEntry {
public:
string OrderID;
int LastQty;
};
struct MDEntry {
public:
// necessary for defaults?
char MDUpdateAction;
char MDEntryType;
int SecurityID;
int RptSeq;
double MDEntryPx;
int MDEntrySize;
int NumberOfOrders = 0;
int MDPriceLevel = 0;
int OpenCloseSettlFlag = 0;
string SettlPriceType = "";
int AggressorSide = 0;
string TradingReferenceDate = "";
double HighLimitPrice = 0.0;
double LowLimitPrice = 0.0;
double MaxPriceVariation = 0.0;
int ApplID = 0;
};
class MDIncrementalRefresh {
public:
string TransactTime;
string MatchEventIndicator;
int NoMDEntries;
int NoOrderIDEntries = 0;
vector<MDEntry> MDEntries;
vector<OrderIdEntry> OrderIdEntries;
MDIncrementalRefresh(const string& message)
{
MDEntry* currentMDEntry = nullptr;
OrderIdEntry* currentOrderIDEntry = nullptr;
for (auto fields : string_split(message, FIX_FIELD_DELIMITER))
{
vector<string> kv = string_split(fields, FIX_KEY_DELIMITER);
// Header :: MDIncrementalRefresh
if (kv[KEY] == Field_TransactTime) this->TransactTime = kv[VALUE];
else if (kv[KEY] == Field_MatchEventIndicator) this->MatchEventIndicator = kv[VALUE];
else if (kv[KEY] == Field_NoMDEntries) this->NoMDEntries = stoi(kv[VALUE]);
else if (kv[KEY] == Field_NoOrderIDEntries) this->NoOrderIDEntries = stoi(kv[VALUE]);
// Repeating Group :: MDEntry
else if (kv[KEY] == Field_MDUpdateAction)
{
MDEntries.push_back(MDEntry());
currentMDEntry = &MDEntries.back(); // use pointer for fast lookup on subsequent repeating group fields
currentMDEntry->MDUpdateAction = kv[VALUE][STR_TO_CHAR];
}
else if (kv[KEY] == Field_MDEntryType) currentMDEntry->MDEntryType = kv[VALUE][STR_TO_CHAR];
else if (kv[KEY] == Field_SecurityID) currentMDEntry->SecurityID = stoi(kv[VALUE]);
else if (kv[KEY] == Field_RptSeq) currentMDEntry->RptSeq = stoi(kv[VALUE]);
else if (kv[KEY] == Field_MDEntryPx) currentMDEntry->MDEntryPx = stod(kv[VALUE]);
else if (kv[KEY] == Field_MDEntrySize) currentMDEntry->MDEntrySize = stoi(kv[VALUE]);
else if (kv[KEY] == Field_NumberOfOrders) currentMDEntry->NumberOfOrders = stoi(kv[VALUE]);
else if (kv[KEY] == Field_MDPriceLevel) currentMDEntry->MDPriceLevel = stoi(kv[VALUE]);
else if (kv[KEY] == Field_OpenCloseSettlFlag) currentMDEntry->OpenCloseSettlFlag = stoi(kv[VALUE]);
else if (kv[KEY] == Field_SettlPriceType) currentMDEntry->SettlPriceType= kv[VALUE];
else if (kv[KEY] == Field_AggressorSide) currentMDEntry->AggressorSide = stoi(kv[VALUE]);
else if (kv[KEY] == Field_TradingReferenceDate) currentMDEntry->TradingReferenceDate = kv[VALUE];
else if (kv[KEY] == Field_HighLimitPrice) currentMDEntry->HighLimitPrice = stod(kv[VALUE]);
else if (kv[KEY] == Field_LowLimitPrice) currentMDEntry->LowLimitPrice = stod(kv[VALUE]);
else if (kv[KEY] == Field_MaxPriceVariation) currentMDEntry->MaxPriceVariation = stod(kv[VALUE]);
else if (kv[KEY] == Field_ApplID) currentMDEntry->ApplID = stoi(kv[VALUE]);
// Repeating Group :: OrderIDEntry
else if (kv[KEY] == Field_OrderID) {
OrderIdEntries.push_back(OrderIdEntry());
currentOrderIDEntry = &OrderIdEntries.back();
currentOrderIDEntry->OrderID = kv[VALUE];
}
else if (kv[KEY] == Field_LastQty) currentOrderIDEntry->LastQty = stol(kv[VALUE]);
}
}
};
int main() {
//std::string filename = "test/sample";
std::string line;
std::ifstream file (filename);
int count = 0;
if (file.is_open())
{
while (std::getline(file, line))
{
MDIncrementalRefresh md(line);
if (md.TransactTime != "") {
count++;
}
}
file.close();
}
cout << count << endl;
return 0;
}
'這是我的第一個C++應用程序'而且你從一開始就堅持吞吐量。獲得一份能夠完成工作而不是效率問題的代碼。如果沒有配置器,你會在優化中犯錯。 – DumbCoder
@DumbCoder我感謝您花時間審查我的問題。雖然我提到這是我的第一個C++應用程序,我沒有說這是我第一次編寫軟件。因此,我完全有能力獲得解決方案,但希望得到一些關於如何最好地分析和了解潛在瓶頸的有用指導(例如重複調用split_string可能會隱含擴展堆分配的事實)。 – awaugh