2016-10-17 77 views
0

我試圖實現這個算法,但我一直卡住了半個月。我必須對指針有一個基本的誤解。任何幫助將不勝感激。從二進制森林中刪除根,並添加爲兒童

算法:建立查找樹

**Require**: list of features in root 
    **while** root contains feature vectors wtih common 1-bits **do** 
     f1,f2 <- features with most common 1-bits 
     p <- f1 **bitwise and** f2 
     set f1, f2 as children of p 
     remove f1, f2 from root 
     insert p into root 
    **end while** 

但願代碼將帶你通過我的思維過程的評論:

#include <iostream> 
#include <bitset> 
#include <list> 
#include <vector> 
#include <string> 
#include <sstream> 

const std::size_t HIP_VECTOR_SIZE = 9; 


struct DatabaseFeature { 
public: 
    DatabaseFeature(std::bitset<HIP_VECTOR_SIZE> &_HIP_vector, 
     DatabaseFeature *_left_child, 
     DatabaseFeature *_right_child, 
     std::string _name) : 
     HIP_vector(_HIP_vector), 
     left_child(_left_child), 
     right_child(_right_child), 
     name(_name) {} 

    std::bitset<HIP_VECTOR_SIZE> HIP_vector; 
    DatabaseFeature *left_child = nullptr; 
    DatabaseFeature *right_child = nullptr; 
    std::string name; 
private: 
}; 

//Create a string representing the binary tree recursively 
std::string serialize(DatabaseFeature *root, std::size_t depth=0) { 

    std::string output = ""; 
    if(root==nullptr) { 
    return " "; 
    } 

    output += serialize(root->right_child, depth+1); 
    output += "\n" + std::string(depth,'\t') + root->HIP_vector.to_string() + "--" + root->name; 
    output += serialize(root->left_child, depth+1); 

    return output; 
} 

int main(int argc, char* argv[]) { 

    std::list<DatabaseFeature> features; 

    //Load sample data 
    std::bitset<HIP_VECTOR_SIZE> F1("010100011"); 
    std::bitset<HIP_VECTOR_SIZE> F2("011010100"); 
    std::bitset<HIP_VECTOR_SIZE> F3("011010101"); 
    std::bitset<HIP_VECTOR_SIZE> F4("110010100"); 
    std::bitset<HIP_VECTOR_SIZE> F5("001100010"); 

    features.emplace_back(F1, nullptr, nullptr, "F1"); 
    features.emplace_back(F2, nullptr, nullptr, "F2"); 
    features.emplace_back(F3, nullptr, nullptr, "F3"); 
    features.emplace_back(F4, nullptr, nullptr, "F4"); 
    features.emplace_back(F5, nullptr, nullptr, "F5"); 


    std::vector<DatabaseFeature> non_roots; 
    std::size_t parent_count = 1; 

    //While root contains features with common 1-bits 
    while(1) { 

    std::size_t highest_common_bit_count = 0; 
    std::list<DatabaseFeature>::iterator feature_iter1; 
    std::list<DatabaseFeature>::iterator feature_iter2; 
    std::bitset<HIP_VECTOR_SIZE> most_common_bits; 

    //Compare every feature to every other feature 
    for(auto feature1 = features.begin(); feature1 != features.end(); ++feature1) { 
     for(auto feature2 = std::next(feature1); feature2 != features.end(); ++feature2) { 

     auto common_bits = feature1->HIP_vector & feature2->HIP_vector; 

     //If a new max is found, save it 
     if(common_bits.count() > highest_common_bit_count) { 

      feature_iter1 = feature1; 
      feature_iter2 = feature2; 
      most_common_bits = common_bits; 
      highest_common_bit_count = common_bits.count(); 
     } 
     } 
    } 

    if(highest_common_bit_count == 0) { 
     std::cout << "No more root features with common bits. " 
       << "Number of root features after forest construction: " 
       << features.size() << std::endl; 
     break; 
    } 


    //Move nodes from roots to non-roots 
    non_roots.push_back(std::move(*feature_iter1)); 
    non_roots.push_back(std::move(*feature_iter2)); 


    //Take the last two nodes for assignment as children 
    auto left_child = std::prev(std::end(non_roots)); 
    auto right_child = std::prev(std::end(non_roots),2); 


    //Erase children from the root 
    features.erase(feature_iter1); 
    features.erase(feature_iter2); 

    std::ostringstream parent_name; 
    parent_name << "P" << parent_count; 

    //Add new parent to root 
    features.emplace_back(most_common_bits, &*left_child, &*right_child, parent_name.str()); 

    std::cout << "Non-Roots: " << std::endl; 
    for(auto & nr : non_roots) { 
     std::cout << nr.HIP_vector << std::endl; 
    } 
    std::cout << std::endl; 

    std::cout << "Serializing:" << std::endl; 
    for(auto it = features.begin(); it != features.end(); ++it) { 
     if(&*it) { 
     std::cout << serialize(&*it) << std::endl << std::endl; 
     } 
    } 

    parent_count++; 
    } 
} 

我收到段錯誤的大部分時間。在非根矢量中使用指向兒童的指針時發生錯誤。任何人都可以告訴我爲什麼會發生這些分段錯誤?謝謝。

+1

如果您收到賽格故障,使用調試器來找出它們正在發生。 – Gene

+0

感謝您的反饋。當父項功能作爲子項添加時,會發生分段錯誤。我知道他們在哪裏發生,但我不明白他們爲什麼會發生。 – socratic

+1

第二次從std :: prev(...)得到left_child,right_child時,它們是不好的。無效指針傳遞給構造函數,然後使用,從而導致您在稍後嘗試解除引用時崩潰。調試器:) –

回答

0

我解決我的問題,通過使用智能DatabaseFeature指針列表和矢量:

#include <iostream> 
#include <bitset> 
#include <list> 
#include <vector> 
#include <string> 
#include <memory> 
#include <sstream> 

const std::size_t HIP_VECTOR_SIZE = 9; 


struct DatabaseFeature { 

    DatabaseFeature(const std::bitset<HIP_VECTOR_SIZE> &_HIP_vector, 
        std::shared_ptr<DatabaseFeature> _left_child, 
        std::shared_ptr<DatabaseFeature> _right_child, 
        const std::string &_node_name) : 
        HIP_vector(_HIP_vector), 
        left_child(_left_child), 
        right_child(_right_child), 
        node_name(_node_name) {}; 

    std::bitset<HIP_VECTOR_SIZE> HIP_vector; 
    std::shared_ptr<DatabaseFeature> left_child = nullptr; 
    std::shared_ptr<DatabaseFeature> right_child = nullptr; 
    std::string node_name; 
}; 



//Create a string representation of the binary tree recursively 
std::string serialize(std::shared_ptr<DatabaseFeature> root, std::size_t depth = 0) { 

    std::string output = ""; 

    if(root==nullptr) { 

    return " "; 
    } 

    output += serialize(root->right_child, depth+1); 
    output += "\n" + std::string(depth,'\t') + root->HIP_vector.to_string() 
     + "--" + root->node_name; 
    output += serialize(root->left_child, depth+1); 

    return output; 
} 



int main(int argc, char* argv[]) { 

    std::list<std::shared_ptr<DatabaseFeature>> root_features; 


    std::bitset<9> f1("010100011"); 
    std::bitset<9> f2("011010100"); 
    std::bitset<9> f3("011010101"); 
    std::bitset<9> f4("110010100"); 
    std::bitset<9> f5("001100010"); 
    std::bitset<9> f6("001010101"); 
    std::bitset<9> f7("001110101"); 
    std::bitset<9> f8("111010101"); 
    std::bitset<9> f9("110100111"); 
    std::bitset<9> f0("000010111"); 

    root_features.push_back(std::make_shared<DatabaseFeature>(f1, nullptr, nullptr, "F1")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f2, nullptr, nullptr, "F2")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f3, nullptr, nullptr, "F3")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f4, nullptr, nullptr, "F4")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f5, nullptr, nullptr, "F5")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f6, nullptr, nullptr, "F6")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f7, nullptr, nullptr, "F7")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f8, nullptr, nullptr, "F8")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f9, nullptr, nullptr, "F9")); 
    root_features.push_back(std::make_shared<DatabaseFeature>(f0, nullptr, nullptr, "F0")); 


    std::vector<std::shared_ptr<DatabaseFeature>> non_root_features; 
    std::size_t parent_count = 1; 


    //While root contains features with common 1-bits 
    while(1) { 

    std::size_t highest_common_bit_count = 0; 
    std::list<std::shared_ptr<DatabaseFeature>>::iterator feature_iter1; 
    std::list<std::shared_ptr<DatabaseFeature>>::iterator feature_iter2; 
    std::bitset<HIP_VECTOR_SIZE> most_common_bits; 

    //Compare every feature to every other feature 
    for(auto feature1 = root_features.begin(); feature1 != root_features.end(); ++feature1) { 
     for(auto feature2 = std::next(feature1); feature2 != root_features.end(); ++feature2) { 

     auto common_bits = (*feature1)->HIP_vector & (*feature2)->HIP_vector; 

     if(common_bits.count() > highest_common_bit_count) { 

      feature_iter1 = feature1; 
      feature_iter2 = feature2; 
      most_common_bits = common_bits; 
      highest_common_bit_count = common_bits.count(); 
     } 
     } 
    } 

    if(highest_common_bit_count == 0) { 
     std::cout << "No more root features with common bits. " 
       << "Number of root features after forest construction: " 
       << root_features.size() << std::endl; 
     break; 
    } 


    //Move nodes from roots to non-roots 
    non_root_features.push_back(*feature_iter1); 
    non_root_features.push_back(*feature_iter2); 


    //Take the last two nodes for assignment as children 
    auto left_child = std::prev(std::end(non_root_features)); 
    auto right_child = std::prev(std::end(non_root_features),2); 


    //Erase children from the root 
    root_features.erase(feature_iter1); 
    root_features.erase(feature_iter2); 


    //Name the new node 
    std::ostringstream parent_name; 
    parent_name << "P" << parent_count; 


    //root_features.emplace_back(most_common_bits, &*left_child, &*right_child); 
    root_features.push_back(std::make_shared<DatabaseFeature>(
     most_common_bits, *left_child, *right_child, parent_name.str())); 


    std::cout << "Non root features: " << std::endl; 
    for(auto & non_root : non_root_features) { 
     std::cout << non_root->HIP_vector << std::endl; 
    } 
    std::cout << std::endl; 

    std::cout << "Binary forest: " << std::endl; 
    for(auto it = root_features.begin(); it != root_features.end(); ++it) { 
     if(*it) { 
     std::cout << serialize(*it) << std::endl << std::endl; 
     } 
    } 

    parent_count++; 
    } 
}