2010-03-19 32 views
0

我得到以下forumulae計算該二進制餘弦變係數

SIM = |Q∩D| /√| Q |√| D |

我去AHED並實施一類比較由一系列詞語

#pragma once 

#include <vector> 
#include <string> 
#include <iostream> 
#include <vector> 

using namespace std; 

class StringSet 
{ 
public: 
StringSet(void); 
StringSet(const string the_strings[], const int no_of_strings); 
~StringSet(void); 
StringSet(const vector<string> the_strings); 
void add_string(const string the_string); 
bool remove_string(const string the_string); 
void clear_set(void); 
int no_of_strings(void) const; 
friend ostream& operator <<(ostream& outs, StringSet& the_strings); 
friend StringSet operator *(const StringSet& first, const StringSet& second); 
friend StringSet operator +(const StringSet& first, const StringSet& second); 
double binary_coefficient(const StringSet& the_second_set); 

private: 
vector<string> set; 
}; 

#include "StdAfx.h" 
#include "StringSet.h" 
#include <iterator> 
#include <algorithm> 
#include <stdexcept> 
#include <iostream> 
#include <cmath> 


StringSet::StringSet(void) 
{ 
} 

StringSet::~StringSet(void) 
{ 
} 

StringSet::StringSet(const vector<string> the_strings) 
{ 
set = the_strings; 
} 

StringSet::StringSet(const string the_strings[], const int no_of_strings) 
{ 
copy(the_strings, &the_strings[no_of_strings], back_inserter(set)); 
} 

void StringSet::add_string(const string the_string) 
{ 
try 
{ 
    if(find(set.begin(), set.end(), the_string) == set.end()) 
    { 
    set.push_back(the_string); 
    } 
    else 
    { 
    //String is already in the set. 
    throw domain_error("String is already in the set"); 
    } 
} 
catch(domain_error e) 
{ 
    cout << e.what(); 
    exit(1); 
} 

} 

bool StringSet::remove_string(const string the_string) 
{ 
//Found the occurrence of the string. return it an iterator pointing to it. 
vector<string>::iterator iter; 
if((iter = find(set.begin(), set.end(), the_string)) != set.end()) 
{ 
    set.erase(iter); 
    return true; 
} 
return false; 
} 
void StringSet::clear_set(void) 
{ 
set.clear(); 
} 

int StringSet::no_of_strings(void) const 
{ 
return set.size(); 
} 

ostream& operator <<(ostream& outs, StringSet& the_strings) 
{ 
vector<string>::const_iterator const_iter = the_strings.set.begin(); 
for(; const_iter != the_strings.set.end(); const_iter++) 
{ 
    cout << *const_iter << " "; 
} 
cout << endl; 
return outs; 
} 

//This function returns the union of the two string sets. 

StringSet operator *(const StringSet& first, const StringSet& second) 
{ 
vector<string> new_string_set; 
new_string_set = first.set; 
for(unsigned int i = 0; i < second.set.size(); i++) 
{ 
    vector<string>::const_iterator const_iter = find(new_string_set.begin(), new_string_set.end(), second.set[i]); 
    //String is new - include it. 
    if(const_iter == new_string_set.end()) 
    { 
    new_string_set.push_back(second.set[i]); 
    } 
} 
StringSet the_set(new_string_set); 
return the_set; 
} 
//This method returns the intersection of the two string sets. 

StringSet operator +(const StringSet& first, const StringSet& second) 
{ 
//For each string in the first string look though the second and see if 
//there is a matching pair, in which case include the string in the set. 
vector<string> new_string_set; 
vector<string>::const_iterator const_iter = first.set.begin(); 
for (; const_iter != first.set.end(); ++const_iter) 
{ 
    //Then search through the entire second string to see if 
    //there is a duplicate. 
    vector<string>::const_iterator const_iter2 = second.set.begin(); 
    for(; const_iter2 != second.set.end(); const_iter2++) 
    { 
    if(*const_iter == *const_iter2) 
    { 
    new_string_set.push_back(*const_iter); 
    } 
    } 
} 
StringSet new_set(new_string_set); 
return new_set; 

} 

double StringSet::binary_coefficient(const StringSet& the_second_set) 
{ 
double coefficient; 
StringSet intersection = the_second_set + set; 

coefficient = intersection.no_of_strings()/sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings()); 
return coefficient; 
} 

的字符串然而,當我嘗試使用以下主要功能計算係數:

// Exercise13.cpp : main project file. 

#include "stdafx.h" 
#include <boost/regex.hpp> 
#include "StringSet.h" 

using namespace System; 
using namespace System::Runtime::InteropServices; 

using namespace boost; 

//This function takes as input a string, which 
//is then broken down into a series of words 
//where the punctuaction is ignored. 



StringSet break_string(const string the_string) 
{ 
regex re; 
cmatch matches; 
StringSet words; 
string search_pattern = "\\b(\\w)+\\b"; 

try 
{ 
    // Assign the regular expression for parsing. 
    re = search_pattern; 
} 
catch(regex_error& e) 
{ 
    cout << search_pattern << " is not a valid regular expression: \"" 
    << e.what() << "\"" << endl; 
    exit(1); 
} 

sregex_token_iterator p(the_string.begin(), the_string.end(), re, 0); 
sregex_token_iterator end; 
for(; p != end; ++p) 
{ 
    string new_string(p->first, p->second); 
    String^ copy_han = gcnew String(new_string.c_str()); 
    String^ copy_han2 = copy_han->ToLower(); 
    char* str2 = (char*)(void*)Marshal::StringToHGlobalAnsi(copy_han2); 
    string new_string2(str2); 
    words.add_string(new_string2); 
} 


return words; 


} 

int main(array<System::String ^> ^args) 
{ 
StringSet words = break_string("Here is a string, with some; words"); 
StringSet words2 = break_string("There is another string,"); 

cout << words.binary_coefficient(words2); 
    return 0; 
} 

我得到指數是1.5116而不是從0到1的值。

有沒有人知道爲什麼會出現這種情況?

任何幫助,將不勝感激。

+0

如果你想在Visual Studio中使用它,你將不得不創建一個Windows CLI控制檯應用程序,因爲在我瘋狂的時候我決定使用一些。 – hairyyak 2010-03-19 16:19:18

+0

如果您的問題已解決,請使用複選標記使其正式(並考慮爲您的以前的問題考慮),接受最有幫助的答案。 – 2010-03-19 18:05:53

回答

0

也許這只是一個優先事項

coefficient = intersection.no_of_strings()/sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings()); 

沒有指定你必須先乘,然後分。他們的優先級是相同的,但我不知道choosen行爲..你嘗試將其指定:

coefficient = intersection.no_of_strings()/(sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings())); 
2

您需要在最後的計算更多的括號。 a/b * c被解析爲(a/b) * c,但您想要a/(b * c)

+0

啊,你是一個真正的 - 我從來沒有發現過。 – hairyyak 2010-03-19 16:25:45

+0

我花了整整一天在網上搜索,因爲我認爲我不懂公式。 – hairyyak 2010-03-19 16:27:41

+0

@hairyyak:如果它適合你,請接受答案。提升和接受答案可以幫助SO社區的其他成員。 – 2016-05-20 23:10:01