我已經分配了一個任務來從一個受密碼保護的站點中取出數據,我通過CURL做了,但是現在我想要獲得由CURL返回的HTML內的鏈接,鏈接並從那裏抓取數據。我將CURL
的回覆傳遞給file_get_contents()
,但無法正常工作。這是我的CURL
代碼。結合CURL和簡單的HTML DOM來報廢數據
$ckfile = tempnam("/tmp", "CURLCOOKIE");
$useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.3 Safari/533.2';
$username = "XXXXXX";
$password = "XXXXXX";
$f = fopen('log.txt', 'w'); // file to write request header for debug purpose
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
$html = curl_exec($ch);
curl_close($ch);
preg_match('~<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />~', $html, $viewstate);
preg_match('~<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />~', $html, $eventValidation);
$viewstate = $viewstate[1];
$eventValidation = $eventValidation[1];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, false);
curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile);
curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile);
curl_setopt($ch, CURLOPT_HEADER, FALSE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_STDERR, $f);
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
// Collecting all POST fields
$postfields = array();
$postfields['__EVENTTARGET'] = "";
$postfields['__EVENTARGUMENT'] = "";
$postfields['__VIEWSTATE'] = $viewstate;
$postfields['__EVENTVALIDATION'] = $eventValidation;
$postfields['ctl00$LoginPopup1$Login1$UserName'] = $username;
$postfields['ctl00$LoginPopup1$Login1$Password'] = $password;
$postfields['ctl00$LoginPopup1$Login1$LoginButton'] = 'Log In';
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postfields);
$ret = curl_exec($ch); // Get result after login page.
下面是簡單的HTML DOM代碼
$html = file_get_contents($ret);
這是錯誤我得到
Warning: file_get_contents(1): failed to open stream: No such file or directory
任何其他建議如何做到這一點可以理解。感謝
$ ret將是來自curl_exec的響應,並且不會是文件名或位置,如果您輸出$ ret,您會得到什麼?你想要curl請求的響應的HTML嗎? – MajorCaiger
$ ret給我1的迴應。 –