2016-05-01 58 views
2

我是比較新的編程C++。我正在實現一個像索引樹一樣的樹,使用unorderd_map來實現樹數據結構來存儲子節點。由於即時通訊工作與結構樹一樣,構造一個搜索方法是遞歸的,我也存儲節點的指針,所以我懷疑我可能有一種處理不好的內存問題。我遇到了分段錯誤。接下來是我的代碼和它的輸出。在遞歸函數中使用unordered_map unordered_set進行分段錯誤

#include <memory> 
#include <sstream> 
#include <unordered_map> 
#include <iostream> 
#include <string> 
#include <sqlite3.h> 
#include "aux_functions.cpp" 
#include <math.h> 

using namespace std; 


class TreeLikeIndex 
{ 
    public: 
    TreeLikeIndex(string attribute, string indices, int indices_count, short int is_leaf, unordered_map<string, TreeLikeIndex*> children); 
    TreeLikeIndex(string indices, int indices_count); 
    TreeLikeIndex(); 
    string search(unordered_map<string, string> *); 
    private: 
    string indices; 
    int indices_count; 
    short int is_leaf; 
    string attribute; 
    unordered_map<string, TreeLikeIndex*> children; 

}; 


string TreeLikeIndex::search(unordered_map<string, string> * _tuple) 
{ 
    if((*_tuple).empty() || this->is_leaf) return this->indices; 
    string att_val = (*_tuple)[this->attribute]; 
    (*_tuple).erase(this->attribute); 
    TreeLikeIndex * child_with_that_value = this->children[att_val]; 
    return (*child_with_that_value).search(_tuple); 
} 



class DecisionTreeLikeIndexer 
{ 

public: 
    DecisionTreeLikeIndexer(string, string, string); 
    int rebuild_index(); 
    TreeLikeIndex * get_index(); 

private: 
    TreeLikeIndex * build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count); 
    TreeLikeIndex * index; 
    string source_db_address; 
    string dest_folder_address; 
    time_t time_of_last_build; 
    unordered_set<string> columns_names; 
    string source_table_name; 
    unordered_set<string> temp_tables_names; 
    string id_column_name; 
    sqlite3 * source_db_connection; 
    int table_count; 
}; 

int DecisionTreeLikeIndexer::rebuild_index() 
{ 
    this->index = this->build_index(this->columns_names, 0, "", 0); 
    this->time_of_last_build = time(NULL); 
    return 0; 
} 

TreeLikeIndex * DecisionTreeLikeIndexer::get_index() 
{ 
    return this->index; 
} 

DecisionTreeLikeIndexer::DecisionTreeLikeIndexer(string source_db_address, string table_name, string dest_folder_address) 
{ 
    this->source_db_address = source_db_address; 
    this->dest_folder_address = dest_folder_address; 
    this->columns_names = Aux::get_column_names(source_db_address, table_name); 
    this->source_table_name = table_name; 
    this->id_column_name = "rowid"; 
    this->source_db_connection = Aux::get_db_connection(this->source_db_address); 

    // Getting count of this table 

    sqlite3_stmt* statement; 
    string query = "SELECT count(*) FROM " + this->source_table_name + ";"; 
    if(sqlite3_prepare(this->source_db_connection, query.c_str(), -1, &statement, 0) == SQLITE_OK) 
    { 
    int res = sqlite3_step(statement); 
    const unsigned char * count_char = sqlite3_column_text(statement,0); 
    if(res == SQLITE_ROW) 
    { 
     stringstream _temp; 
     _temp << count_char; 
     _temp >> this->table_count;  
    } 
    sqlite3_finalize(statement);  
    } 
    else 
    { 
    cout << "Error initializating Indexer (Getting initial table count): " << sqlite3_errmsg(this->source_db_connection) << endl; 
    } 

} 

TreeLikeIndex * DecisionTreeLikeIndexer::build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count) 
{ 

    if(attributes_list.size() <=1 || (depth > 0 && ids_list_count <= 1)) 
    { 
    Aux::tabs(depth); 
    cout << "Leaf at depth: " << depth << " Ids are: " << comma_separated_ids << " Ids count: " << ids_list_count << endl; 
    static TreeLikeIndex * node = new TreeLikeIndex((string)comma_separated_ids, (int)ids_list_count); 
    return node; 
    } 

    string source_table = this->source_table_name; 
    int count = this->table_count; 

    if(depth > 0) 
    { 
    while(1) 
    { 
    source_table = *Aux::get_random_list_of_strings(1).begin(); 
    if(this->temp_tables_names.insert(source_table).second) break; 
    } 

    const string create_temp_table_stmnt = "CREATE TEMP TABLE " + source_table + " AS SELECT * FROM " + this->source_table_name + " WHERE " + this->id_column_name + " IN(" + comma_separated_ids + ")"; 
    sqlite3_exec(this->source_db_connection, create_temp_table_stmnt.c_str(),Aux::sqlt_callback,0,NULL); 
    count = ids_list_count; 
    Aux::tabs(depth); 
    cout << "Not root node" << endl; 
    } 

    Aux::tabs(depth); 
    cout << "Source table is: " << source_table << " Table count is: " << count << endl; 
    Aux::tabs(depth); 
    cout << "Attributes list is: "; for_each(attributes_list.begin(), attributes_list.end(),[](string v){cout << v << " ";}); 
    cout << endl; 
    const double E = log2(count) ; 
    Aux::tabs(depth); 
    cout << "Entropy of node: " << E << endl; 
    string best_attribute; 
    double best_gain; 
    unordered_set<string> best_attribute_values; 

    for(string attr: attributes_list) 
    { 
    Aux::tabs(depth+1); 
cout << "Analysing attribute: " << attr << endl; 
const string get_at_count_values_query = "SELECT " + attr + ", count(" + attr + ") FROM " + source_table + " GROUP BY " + attr + ";"; 
    sqlite3_stmt * stmnt; 
    double weighted_entropy = 0; 
    unordered_set<string> this_att_values; 
    if(sqlite3_prepare(this->source_db_connection, get_at_count_values_query.c_str(), -1, &stmnt, 0) == SQLITE_OK) 
    { 
     for(;;) 
     { 


     int res = sqlite3_step(stmnt); 

     if(res == SQLITE_DONE || res==SQLITE_ERROR) 
     { 
     double gAti = E - weighted_entropy; 
     Aux::tabs(depth+1); 
     cout << "Finish computing WE for att: " << attr << " Gain is: " << gAti << endl; 
     if(gAti > best_gain) 
     { 
      Aux::tabs(depth+1); 
      cout << "Found attribute with better gain." << endl; 
      best_gain = gAti; 
      best_attribute = attr; 
      best_attribute_values.clear(); 

     Aux::tabs(depth+1); 
     for(string v:this_att_values) 
      { 
      best_attribute_values.insert(v); 
      } 
      cout << endl; 

      this_att_values.clear(); 
     } 
     sqlite3_finalize(stmnt); 
     //delete &res; 
     break; 
     } 
     if(res == SQLITE_ROW) 
     { 

     string val = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
     int vSize = sqlite3_column_int(stmnt,1); 
     Aux::tabs(depth+2); 
     this_att_values.insert(val); 
     double ratio = double(vSize)/double(count); 
     weighted_entropy += double(ratio) * double(log2(vSize)); 
     Aux::tabs(depth+2); 
     cout << "Processing value: " << val << " With vSize: " << vSize << " Current WE is: " << weighted_entropy << endl; 
     } 
    } 
    } 
} 

    Aux::tabs(depth); 
    cout << "Finish processing attributes list. Best attribute is: " <<  best_attribute << " Best gain is: " << best_gain << endl; 
    Aux::tabs(depth); 
    cout << "Best attribute values are: ";  for_each(best_attribute_values.begin(), best_attribute_values.end(), [](string v){cout << v << ",";}); cout << endl; 
    unordered_map<string, TreeLikeIndex *> children; 
    for(string val: best_attribute_values) 
    { 

    const string get_ids_of_bestatt_val = "SELECT rowid FROM " + source_table + " WHERE " + best_attribute + " = " + val + ";"; 
    int ids_count = 0; 
    sqlite3_stmt * stmnt; 
    string ids = ""; 
    bool first = 1; 
    int next_depth = depth + 1; 
    unordered_set<string> next_attributes_set; 

    for(string attr: attributes_list) if(attr != best_attribute) next_attributes_set.insert(attr); 

    if(sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK) 
    { 
     for(;;) 
     { 
     int res = sqlite3_step(stmnt); 

     if(res == SQLITE_ROW) 
      { 
      string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
      if(!first) ids += "," + id; 
      else ids += id; 
      ids_count++; 

      } 

     if(res == SQLITE_DONE || res == SQLITE_ERROR) 
      { 
      Aux::tabs(depth+1); 
      cout << "Adding branch for val: " << val << endl; 
      Aux::tabs(depth+1); 
      cout << " Next attributes are: ";  for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";}); 
      cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl; 
      sqlite3_finalize(stmnt); 
      static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count); 
      pair<string, TreeLikeIndex*> child (val, temp_child); 
     children.insert(child); 
      } 
     } 
    } 
    } 

    Aux::tabs(depth); 
    cout << "Finish processing node, will return." << endl; 
    static TreeLikeIndex * no_leaf_node = new TreeLikeIndex(best_attribute, "all", count, 0, children); 
    return no_leaf_node; 

} 
} 

TreeLikeIndex::TreeLikeIndex(std::string attribute, std::string indices, int indices_count, short int is_leaf, unordered_map<std::string, TreeLikeIndex*> children) 
{ 
    this->attribute = attribute; 
    this->indices = indices; 
    this->is_leaf = is_leaf; 
    this->children = children; 
    this->children.clear(); 
    for(pair<string, TreeLikeIndex*> p: children) this->children.insert(p); 
    this->indices_count = indices_count; 
} 

TreeLikeIndex::TreeLikeIndex(string indices, int indices_count) 
{ 
    this->indices = indices; 
    this->indices_count = indices_count; 
    this->is_leaf = 1; 
} 

TreeLikeIndex::TreeLikeIndex() 
{ 
    this->indices = ""; 
    this->indices_count = 0; 
    this->is_leaf = 1; 
} 




int main() 
{ 

string source_db_address = "my_table"; 
string table_name = "b"; 
string dest_folder_address = "."; 

DecisionTreeLikeIndexer indexer(source_db_address, table_name, dest_folder_address); 
indexer.rebuild_index(); 
} 

,輸出是:

Source table is: b Table count is: 9 
Attributes list is: cant_n_dec cant_n_des cant_n_control 
Entropy of node: 3.16993 
    Analysing attribute: cant_n_dec 
        Processing value: 1 With vSize: 1 Current WE is: 0 
        Processing value: 2 With vSize: 4 Current WE is: 0.888889 
        Processing value: 3 With vSize: 2 Current WE is: 1.11111 
        Processing value: 4 With vSize: 1 Current WE is: 1.11111 
        Processing value: 5 With vSize: 1 Current WE is: 1.11111 
    Finish computing WE for att: cant_n_dec Gain is: 2.05881 
    Found attribute with better gain. 

    Analysing attribute: cant_n_des 
        Processing value: 1 With vSize: 2 Current WE is: 0.222222 
        Processing value: 2 With vSize: 4 Current WE is: 1.11111 
        Processing value: 3 With vSize: 2 Current WE is: 1.33333 
        Processing value: 5 With vSize: 1 Current WE is: 1.33333 
    Finish computing WE for att: cant_n_des Gain is: 1.83659 
    Analysing attribute: cant_n_control 
        Processing value: 1 With vSize: 2 Current WE is: 0.222222 
        Processing value: 2 With vSize: 3 Current WE is: 0.750543 
        Processing value: 3 With vSize: 3 Current WE is: 1.27886 
        Processing value: 5 With vSize: 1 Current WE is: 1.27886 
    Finish computing WE for att: cant_n_control Gain is: 1.89106 
Finish processing attributes list. Best attribute is: cant_n_dec Best gain is: 2.05881 
Best attribute values are: 1,2,3,4,5, 
    Adding branch for val: 1 
    Next attributes are: cant_n_control,cant_n_des, Depth is: 1 Ids are: 3 Ids count: 1 
    Leaf at depth: 1 Ids are: 3 Ids count: 1 
Segmentation fault 
+0

而且當您逐行調試您的代碼時,您的調試器會根據調試程序導致分段故障,以及堆棧跟蹤是什麼樣的? –

+0

我使用打印(cout <<)作爲調試技術。程序執行從第一個(初始)一步進入遞歸上下文,並返回一個指針(指向葉節點)。這個指針要在使用insert的第一個(初始)上下文中存儲到unordered_map中。就像這樣:T * obj = recursive_call(); some_pair(「key」,obj); uMap.insert(some_pair);有崩潰的地方。 –

+0

把額外的打印語句完全不適用於所有,但最簡單的錯誤。學習和了解如何使用調試器是每個C++開發人員必備的技能。調試器提供的信息比基本的打印語句有用得多。放下這個程序,花一些時間學習使用調試器。 –

回答

0

我不是舒爾但....

我覺得這個問題可以在下面的循環

for(;;) 
    { 
    int res = sqlite3_step(stmnt); 

    if(res == SQLITE_ROW) 
     { 
     string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
     if(!first) ids += "," + id; 
     else ids += id; 
     ids_count++; 

     } 

    if(res == SQLITE_DONE || res == SQLITE_ERROR) 
     { 
     Aux::tabs(depth+1); 
     cout << "Adding branch for val: " << val << endl; 
     Aux::tabs(depth+1); 
     cout << " Next attributes are: ";  for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";}); 
     cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl; 
     sqlite3_finalize(stmnt); 
     static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count); 
     pair<string, TreeLikeIndex*> child (val, temp_child); 
    children.insert(child); 
     } 
    } 

我不明白什麼時候終止(在for(;;)沒有退出條件,沒有return和沒有break在塊中)。

我懷疑澈分割故障是由下面的指令引起

int res = sqlite3_step(stmnt); 

的情況下,SQLITE_DONESQLITE_ERROR情況下(蒙山調用

sqlite3_finalize(stmnt); 

)之後,循環被重複再次,stmnt無效。

以下是可以解決的問題嗎?

if (sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK) 
{ 
    while (sqlite3_step(stmnt) == SQLITE_ROW) 
    { 
     ids += (first ? "" : ",") 
     + std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0))); 
     ids_count++; 
    } 

    Aux::tabs(depth+1); 
    cout << "Adding branch for val: " << val << endl; 
    Aux::tabs(depth+1); 
    cout << " Next attributes are: "; 
    for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";}); 
    cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl; 
    sqlite3_finalize(stmnt); 
    static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count); 
    pair<string, TreeLikeIndex*> child (val, temp_child); 
    children.insert(child); 
} 
+0

現在它的工作,非常感謝,不知道我怎麼沒有看到。 –