2010-11-19 271 views
2

我試圖將這個curl函數包含到我的類中,但遇到了CURLOPT_WRITEFUNCTION問題。編譯後沒有找到我的解決方案。還嘗試了一些基於stackoverflow的東西,沒有可用。CURLOPT_WRITEFUNCTION指向成員函數的指針

這是我嘗試(在此代碼替換 '作家')

節點::作家 &節點::作家 的std :: bind1st(STD :: mem_fun(&節點::作家),這一點) ;

這裏是我的代碼:

#ifndef NODE_H_ 
#define NODE_H_ 

int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer); 

/* 
* function prototypes 
*/ 

class node { 
/* 
    * general struct to hold html element properties 
    */ 
struct tag; 

/* 
    * the url and source of the page 
    */ 
std::string url; 
std::string source; 

/* 
    * vector of structures that store tag elements 
    */ 
std::vector<tag> heading; 
std::vector<tag> anchor; 

/* 
    * grab source with curl 
    */ 
std::string curlHttpget(const std::string &url); 

/* 
    * add tag structs to vector 
    * @see std::vector<tag> heading 
    * @see std::vector<tag> anchor 
    */ 
void add_heading(std::string, std::string); 
void add_anchor(std::string, std::string); 

public: 
/* 
    * constructors 
    */ 
node(){} 
node(std::string); 

/* 
    * deconstructors 
    */ 
~node(){} 

/* 
    * crawl page 
    */ 
void load(std::string seed);//crawls the page 

/* 
    * anchor tags 
    */ 
void get_anchors();// scrape the anchor tags 
void display_anchors(); 

/* 
    * heading tags 
    */ 
void get_headings();// scrape heading tags 
void display_headings(); 
}; 
/* 
* for all stored html elements 
*/ 
struct node::tag { 
std::string text; 
std::string properties; 
tag(std::string t, std::string p) : text(t), properties(p) {} 
}; 

/* 
* constructors 
*/ 
node::node(std::string seed) { 
load(seed); 
get_anchors(); 
get_headings(); 
} 
/* 
* araneus::subroutines 
*/ 

// crawl the page 
void node::load(std::string seed) { 
url = seed; 
source = curlHttpget(url); 
} 


//scrape html source 
std::string node::curlHttpget(const std::string &url) { 
std::string buffer; 

CURL *curl; 
CURLcode result; 

curl = curl_easy_init(); 

if (curl) { 
    curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); 
    curl_easy_setopt(curl, CURLOPT_HEADER, 0); 
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writer); 
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer); 

    result = curl_easy_perform(curl);//http get performed 

    curl_easy_cleanup(curl);//must cleanup 

    //error codes: http://curl.haxx.se/libcurl/c/libcurl-errors.html 
    if (result == CURLE_OK) { 
    return buffer; 
    } 
    //curl_easy_strerror was added in libcurl 7.12.0 
    std::cerr << "error: " << result << " " << curl_easy_strerror(result) << std::endl; 
    return ""; 
} 

std::cerr << "error: could not initalize curl" << std::endl; 
return ""; 
} 

void node::get_headings() { 
static const regex expression("<[hH][1-6]\\s*(?<properties>.*?)\\s*>(?<name>.*?)</\\s*[hH][1-6]\\s*>"); 

int const subMatches[] = { 1, 2 }; 

sregex_token_iterator p(source.begin(), source.end(), expression, subMatches); 
sregex_token_iterator end; 

string text; 
string properties; 

int count = 0; 
for (;p != end; count++, ++p) 
{ 
    string m(p->first, p->second); 

    if(count % 2) { 
    text = m; 
    add_heading(text, properties); 
    } 
    else { 
    properties = m; 
    } 
} 
} 

//use regex to find anchors in source 
void node::get_anchors() { 
static const regex expression("<[a|A].*?[href|HREF]\\s*=[\"\"'](?<url>.*?)[\"\"'].*?>(?<name>.*?)</[a|A]>"); 
static const regex relative("^\\/"); 
static const regex firstChar("^[A-Za-z0-9\\-_\\$\\.\\+!\\*'\\(\\)#]"); // valid url characters 
static const regex protocol("^[http:\\/\\/|HTTP:\\/\\/|https:\\/\\/|HTTPS:\\/\\/|ftp:\\/\\/|FTP:\\/\\/|sftp:\\/\\/|SFTP:\\/\\/]"); 

int const subMatches[] = { 1, 2 }; 

sregex_token_iterator p(source.begin(), source.end(), expression, subMatches); 
sregex_token_iterator end; 

string text, properties; 

int count = 0; 
for (; p != end; count++, ++p) { 
    std::string m(p->first, p->second); 

    if(count % 2) { 
    text = m; 
    add_anchor(text, properties); 
    } 
    else { 
    if(regex_search(m, relative)) { //if link is in "/somewhere" format 
    properties = url + m; 
    } 
    else if(regex_search(m, protocol)) { //if link is absolute "http://www.somewhere.com" 
    properties = m; 
    } 
    else if(regex_search(m, firstChar)) { //if link starts with a valid url char "somewhere.html" 
    properties = url + "/" + m; 
    } 
    else { 
    std::cout << "link of unknown protocol: " << m << std::endl; 
    } 
    } 
} 
} 

void node::add_heading(std::string text, std::string properties) { 
heading.push_back(tag(text, properties)); 
} 

void node::display_headings() { 
for(int i = 0; i < (int)heading.size(); i++) { 
    std::cout<< "[h]: " << heading[i].text << endl; 
    std::cout<< "[h.properties]: " << heading[i].properties << endl; 
} 
cout << "found " << (int)heading.size() << " <h[1-6]> tags" << endl; 
} 

void node::add_anchor(std::string text, std::string properties) { 
anchor.push_back(tag(text, properties)); 
} 

void node::display_anchors() { 
for(int i = 0; i < (int)anchor.size(); i++) { 
    std::cout<< "[a]: " << anchor[i].text << endl; 
    std::cout<< "[a.properties]: " << anchor[i].properties << endl; 
} 
cout << "found " << (int)anchor.size() << " <a> tags" << endl; 
} 

//required by libcurl 
int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer) { 
int result = 0; 

if (buffer != NULL) { 
    buffer->append(data, size * nmemb); 
    result = size * nmemb; 
} 
return result; 
} 

#endif /* NODE_H_ */ 

尋找一個解決方案,以獲得函數「廉政作家」是「詮釋節點::作家」。當我調用CURLOPT_WRITEFUNCTION時,問題發生在std :: string node :: curlHttpget中。

&節點::作家編譯,但給人一種賽格故障=/

感謝

回答

1

而不是使用的std :: string的*使用節點*作爲參數或其他類似類HTTPGET,有一個標準: :字符串和指向您節點的指針,以便它可以寫入字符串並在每次調用時訪問您的節點。

boost :: bind不適用於C-API回調。

它編譯,因爲curl_easy_setopt使用...所以是完全不是類型安全的。你可以在陽光下傳遞任何你想要的類型,然後編譯它。它可能不會運行,因爲你發現你的成本。

我會去,讓你的函數的額外的類型安全具有完全相同的簽名Curl_write_callback即無效*作爲第四個參數,並做在功能實現鑄造。