我試圖將這個curl函數包含到我的類中,但遇到了CURLOPT_WRITEFUNCTION問題。編譯後沒有找到我的解決方案。還嘗試了一些基於stackoverflow的東西,沒有可用。CURLOPT_WRITEFUNCTION指向成員函數的指針
這是我嘗試(在此代碼替換 '作家')
節點::作家 &節點::作家 的std :: bind1st(STD :: mem_fun(&節點::作家),這一點) ;
這裏是我的代碼:
#ifndef NODE_H_
#define NODE_H_
int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer);
/*
* function prototypes
*/
class node {
/*
* general struct to hold html element properties
*/
struct tag;
/*
* the url and source of the page
*/
std::string url;
std::string source;
/*
* vector of structures that store tag elements
*/
std::vector<tag> heading;
std::vector<tag> anchor;
/*
* grab source with curl
*/
std::string curlHttpget(const std::string &url);
/*
* add tag structs to vector
* @see std::vector<tag> heading
* @see std::vector<tag> anchor
*/
void add_heading(std::string, std::string);
void add_anchor(std::string, std::string);
public:
/*
* constructors
*/
node(){}
node(std::string);
/*
* deconstructors
*/
~node(){}
/*
* crawl page
*/
void load(std::string seed);//crawls the page
/*
* anchor tags
*/
void get_anchors();// scrape the anchor tags
void display_anchors();
/*
* heading tags
*/
void get_headings();// scrape heading tags
void display_headings();
};
/*
* for all stored html elements
*/
struct node::tag {
std::string text;
std::string properties;
tag(std::string t, std::string p) : text(t), properties(p) {}
};
/*
* constructors
*/
node::node(std::string seed) {
load(seed);
get_anchors();
get_headings();
}
/*
* araneus::subroutines
*/
// crawl the page
void node::load(std::string seed) {
url = seed;
source = curlHttpget(url);
}
//scrape html source
std::string node::curlHttpget(const std::string &url) {
std::string buffer;
CURL *curl;
CURLcode result;
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_HEADER, 0);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writer);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
result = curl_easy_perform(curl);//http get performed
curl_easy_cleanup(curl);//must cleanup
//error codes: http://curl.haxx.se/libcurl/c/libcurl-errors.html
if (result == CURLE_OK) {
return buffer;
}
//curl_easy_strerror was added in libcurl 7.12.0
std::cerr << "error: " << result << " " << curl_easy_strerror(result) << std::endl;
return "";
}
std::cerr << "error: could not initalize curl" << std::endl;
return "";
}
void node::get_headings() {
static const regex expression("<[hH][1-6]\\s*(?<properties>.*?)\\s*>(?<name>.*?)</\\s*[hH][1-6]\\s*>");
int const subMatches[] = { 1, 2 };
sregex_token_iterator p(source.begin(), source.end(), expression, subMatches);
sregex_token_iterator end;
string text;
string properties;
int count = 0;
for (;p != end; count++, ++p)
{
string m(p->first, p->second);
if(count % 2) {
text = m;
add_heading(text, properties);
}
else {
properties = m;
}
}
}
//use regex to find anchors in source
void node::get_anchors() {
static const regex expression("<[a|A].*?[href|HREF]\\s*=[\"\"'](?<url>.*?)[\"\"'].*?>(?<name>.*?)</[a|A]>");
static const regex relative("^\\/");
static const regex firstChar("^[A-Za-z0-9\\-_\\$\\.\\+!\\*'\\(\\)#]"); // valid url characters
static const regex protocol("^[http:\\/\\/|HTTP:\\/\\/|https:\\/\\/|HTTPS:\\/\\/|ftp:\\/\\/|FTP:\\/\\/|sftp:\\/\\/|SFTP:\\/\\/]");
int const subMatches[] = { 1, 2 };
sregex_token_iterator p(source.begin(), source.end(), expression, subMatches);
sregex_token_iterator end;
string text, properties;
int count = 0;
for (; p != end; count++, ++p) {
std::string m(p->first, p->second);
if(count % 2) {
text = m;
add_anchor(text, properties);
}
else {
if(regex_search(m, relative)) { //if link is in "/somewhere" format
properties = url + m;
}
else if(regex_search(m, protocol)) { //if link is absolute "http://www.somewhere.com"
properties = m;
}
else if(regex_search(m, firstChar)) { //if link starts with a valid url char "somewhere.html"
properties = url + "/" + m;
}
else {
std::cout << "link of unknown protocol: " << m << std::endl;
}
}
}
}
void node::add_heading(std::string text, std::string properties) {
heading.push_back(tag(text, properties));
}
void node::display_headings() {
for(int i = 0; i < (int)heading.size(); i++) {
std::cout<< "[h]: " << heading[i].text << endl;
std::cout<< "[h.properties]: " << heading[i].properties << endl;
}
cout << "found " << (int)heading.size() << " <h[1-6]> tags" << endl;
}
void node::add_anchor(std::string text, std::string properties) {
anchor.push_back(tag(text, properties));
}
void node::display_anchors() {
for(int i = 0; i < (int)anchor.size(); i++) {
std::cout<< "[a]: " << anchor[i].text << endl;
std::cout<< "[a.properties]: " << anchor[i].properties << endl;
}
cout << "found " << (int)anchor.size() << " <a> tags" << endl;
}
//required by libcurl
int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer) {
int result = 0;
if (buffer != NULL) {
buffer->append(data, size * nmemb);
result = size * nmemb;
}
return result;
}
#endif /* NODE_H_ */
尋找一個解決方案,以獲得函數「廉政作家」是「詮釋節點::作家」。當我調用CURLOPT_WRITEFUNCTION時,問題發生在std :: string node :: curlHttpget中。
&節點::作家編譯,但給人一種賽格故障=/
感謝