0
我正試圖從維基百科下載數據。我發送一個GET請求,但返回狀態 - 「HTTP 301重定向」。現在我需要解析它以獲取目標網址,並從那裏訪問文章內容。 我做了解析,但我無法訪問文章內容。如果您可以幫助我查看文章內容,我感謝您的幫助。從維基百科頁面下載數據
碼 -
#include <winsock2.h>
#include <WS2tcpip.h>
#include <windows.h>
#include <iostream>
#include <vector>
#include <ostream>
using namespace std;
int main(){
string query = "Google";
//int depth = 0;
// Initialize Dependencies to the Windows Socket.
WSADATA wsaData;
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
cout << "WSAStartup failed.\n";
system("pause");
}
struct addrinfo hints;
ZeroMemory(&hints, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_protocol = IPPROTO_TCP;
hints.ai_socktype = SOCK_STREAM;
/* connect to wikipedia //and download the article */
static const char wiki_host[] = "en.wikipedia.org";
struct addrinfo* targetAdressInfo = NULL;
DWORD getAddrRes = getaddrinfo(wiki_host, NULL, &hints, &targetAdressInfo);
if (getAddrRes != 0 || targetAdressInfo == NULL)
{
cout << "Could not resolve the Host Name" << endl;
system("pause");
WSACleanup();
return -1;
}
SOCKADDR_IN sockAddr;
sockAddr.sin_addr = ((struct sockaddr_in*) targetAdressInfo->ai_addr)->sin_addr;
sockAddr.sin_family = AF_INET;
sockAddr.sin_port = htons(80);
freeaddrinfo(targetAdressInfo);
SOCKET webSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (webSocket == INVALID_SOCKET)
{
cout << "Creation of the Socket Failed" << endl;
system("pause");
WSACleanup();
return -1;
}
if (connect(webSocket, (SOCKADDR*)&sockAddr, sizeof(sockAddr)) != 0)
{
cout << "Could not connect";
system("pause");
closesocket(webSocket);
WSACleanup();
return -1;
}
string http_query = "GET /wiki/" + query + " HTTP/1.1\r\n";
http_query += string("Host: ") + wiki_host + "\r\n";
http_query += "\r\n\r\n";
if (send(webSocket, http_query.c_str(), http_query.length(), 0) == -1) {
cout << "Could not send the request to the Server" << endl;
system("pause");
closesocket(webSocket);
WSACleanup();
return -1;
}
/* prepare to fetch the wiki article */
string response = "";
while (true) {
static char recv_buffer[4096];
const int bytes_read = recv(webSocket, recv_buffer, sizeof(recv_buffer) - 1, 0);
if (!bytes_read) {
break;
}
if (bytes_read == -1) {
closesocket(webSocket);
WSACleanup();
}
recv_buffer[bytes_read] = '\0';
response += recv_buffer;
};
/* finished with the socket */
closesocket(webSocket);
WSACleanup();
/* parse the http response headers */
size_t cursor = 0;
string response_content;
vector<std::string> response_headers;
const size_t headers_end = response.find("\r\n\r\n");
while (true) {
const size_t line_end = response.find("\r\n", cursor);
if (line_end == std::string::npos) { /* probably due to http error */
break;
}
response_headers.push_back(response.substr(cursor, line_end - cursor));
if (line_end == headers_end) { /* found content */
response_content = response.substr(headers_end + 4); /* skip \r\n\r\n */
break;
}
cursor = line_end + 2; /* skip \r\n */
}
// print the respone
for (int i = 0; i < sizeof(response); i++){
cout << response[i];
}
/* print response_content not work
for (int i = 0; i < sizeof(response); i++){
cout << response_content[i];
}*/
system("pause");
return 0;
}
不要嘗試自己實現HTTP,要麼使用[Windows WinInet庫](https://msdn.microsoft.com/en-us/library/windows/desktop/aa383630%28v=vs.85%29 .aspx)或[cURL庫](http://curl.haxx.se/libcurl/)。從網絡服務器獲取「頁面」時,它將使得生活更加簡單。 –
我建議你使用像curl這樣的高級HTTP客戶端庫,而不是手寫一個。這更容易,更不容易出錯。除了你想學習手寫一個。 – leemes
優秀的API,但我學會手寫(: –