我在C++中有一個隨機森林的實現,我通過mex在matlab中運行。它會順利運行,直到它達到下面的功能,它會卡住,並開始消耗內存,直到計算機凍結。內存泄漏可能是由遞歸函數造成的
void MyFunction(
const IDataPointCollection& data,
std::vector<std::vector<int> >& leafNodeIndices,
ProgressStream* progress=0) const
{
ProgressStream defaultProgressStream(std::cout, Interest);
progress = (progress==0)?&defaultProgressStream:progress;
leafNodeIndices.resize(TreeCount());
tbb::parallel_for<int>(0,TreeCount(),[&](int t)
{
leafNodeIndices[t].resize(data.Count());
(*progress)[Interest] << "\rApplying tree " << t << "...";
trees_[t]->Apply(data, leafNodeIndices[t]);
});
(*progress)[Interest] << "STUCK HERE" << std::endl;
return;
}
通過上面trees_[t]->Apply()
的代碼去,我能夠將它縮小到下面的遞歸函數:
void ApplyNode(
int nodeIndex,
const IDataPointCollection& data,
std::vector<unsigned int>& dataIndices,
int i0,
int i1,
std::vector<int>& leafNodeIndices,
std::vector<float>& responses_)
{
std::cout<<"applying node"<<std::endl;
assert(nodes_[nodeIndex].IsNull()==false);
Node<F,S>& node = nodes_[nodeIndex];
if (node.IsLeaf())
{
for (int i = i0; i < i1; i++)
leafNodeIndices[dataIndices[i]] = nodeIndex;
return;
}
else if (i0 == i1) // No samples left
return;
else
{
for (int i = i0; i < i1; i++)
responses_[i] = node.Feature.GetResponse(data, dataIndices[i]);
int ii = Partition(responses_, dataIndices, i0, i1, node.Threshold);
// Recurse for child nodes.
ApplyNode(nodeIndex * 2 + 1, data, dataIndices, i0, ii, leafNodeIndices, responses_);
ApplyNode(nodeIndex * 2 + 2, data, dataIndices, ii, i1, leafNodeIndices, responses_);
return;
}
}
每次調用遞歸函數依賴於node.Feature.GetResponse()
功能不同的計算時間。如果我使所有遞歸調用的計算時間相同(通過更改GetResponse()
),代碼將平穩運行。
float AxisAlignedFeatureResponse::GetResponse(const IDataPointCollection& data, int index) const {
double retArg;
// retrieve DataManager object
const DataManager& concreteData = (const DataManager&)(data);
// // retrieve data point at index
DataPoint currDataPoint = concreteData.getDataPoint(index);
//
// // get coordinates of data point
Coordinate currCoordinates = currDataPoint.getOrigPos();
//
// // get intensity image of the respective data point
int imgIndex = currDataPoint.getImageIndex();
Image currImg = concreteData.getImage(imgIndex);
Image currFeatureImg = concreteData.getFeatureImage(imgIndex);
// return respective feature
int featureNumber = (int)(this->axis*(double)concreteData.getNumberOfFeatures());
if(featureNumber>=concreteData.getNumberOfFeatures()){
cout<<"warning! trying to reach a feature that is not there!"<<endl;
featureNumber=concreteData.getNumberOfFeatures()-1;
}
std::vector<Coordinate> feature = concreteData.getFeature(featureNumber);
Coordinate tmp=currCoordinates+feature[0];
if(feature[1].x == 0) {
retArg = currCoordinates.x*feature[0].x+currCoordinates.y*feature[0].y+currCoordinates.z*feature[0].z;
//retArg = 0; //DOING THIS runs the code smoothly
}
else if(feature[1].x == 2) {
retArg = currFeatureImg.getValue(feature[0]);
} else {
retArg = currImg.mean(tmp,feature[1]);
}
return (float)(retArg);
//return (float) 0;
}
所以看起來可能問題出在GetResponse?代碼在哪裏? – xaxxon
@xaxxon現在增加了,雖然我不明白除了計算時間差異之外怎麼會有問題 – Azhar