2013-12-17 38 views
0

我試圖登錄到站點,然後轉到其中一個頁面並檢索數據。我有授權問題。 示例代碼:使用PHP和CURL登錄和下載內容

$loginUrl = 'https://strona.pl/authorization'; //action from the login form 
$loginFields = array('username'=>'[email protected]', 'password'=>'haslo'); //login form field names and values 
$remotePageUrl = 'https://strona.pl/pdstrona'; //url of the page you want to save 

$login = getUrl($loginUrl, 'post', $loginFields); //login to the site 

$remotePage = getUrl($remotePageUrl); //get the remote page 

function getUrl($url, $method='', $vars='') { 
    $ch = curl_init(); 
    if ($method == 'post') { 
     curl_setopt($ch, CURLOPT_POST, 1); 
     curl_setopt($ch, CURLOPT_POSTFIELDS, $vars); 
    } 
    curl_setopt($ch, CURLOPT_URL, $url); 
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
    curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookies/cookies.txt'); 
    curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookies/cookies.txt'); 
    $buffer = curl_exec($ch); 
     //jeżeli pojawił się błąd to go wyświetlimy 
     if(curl_exec($ch) === false) 
     { 
    echo 'Curl error: ' . curl_error($ch); 
     } 
    curl_close($ch); 
    return $buffer; 
     print $status; 
} 
// wyłącza pokazywanie błędów dla funkcji loadHTML 
libxml_use_internal_errors(true); 
$dom = new DOMDocument; 
$dom ->strictErrorChecking = FALSE; 
$dom->loadHTML($remotePage); 
$xpath = new DOMXpath($dom); 

由於此查詢我重定向到主網頁(未分)的結果。重要的是,我以編碼形式將電子郵件地址輸入到腳本中。例如:電子郵件%40mojmail.pl

編輯

好 - 我會添加一些信息。我是一名出版商並與afilo.pl合作。每天檢查費率是非常累人的,所以我想編寫一個腳本,每天收集一次數據並通知我這些變化。

不幸的是我無法檢索數據。 從我的瀏覽器中取回cookies: Set-Cookie:PHPSESSID = jat102p33s0pmfairri1qiih24;到期日=星期四,25-Dec-2013 9:16:40 pm格林威治標準時間 ; path = /; domain =。 Afilo.pl

我修改了代碼,但它仍然不起作用。

loginUrl = 'https://opentrack.afilo.pl/logowanie'; //action from the login form 
$loginFields = array('loginemail'=>'.......','loginhaslo'=>'........'); 
//login form field names and values 
$remotePageUrl = 'https://opentrack.afilo.pl/partner/programy-lista'; //url of the page you want to save 



$login = getUrl($loginUrl, 'post', $loginFields); //login to the site 

$remotePage = getUrl($remotePageUrl); //get the remote page 

function getUrl($url, $method='', $vars='') { 
    $ch = curl_init(); 
    if ($method == 'post') { 
     curl_setopt($ch, CURLOPT_POST, 1); 
     curl_setopt($ch, CURLOPT_POSTFIELDS, $vars); 
    } 

    session_start(); 
    $strCookie = session_name() . '=' . $_COOKIE[ session_name() ] . '; path=/;  domain=.afilo.pl'; 
    session_write_close(); 

    curl_setopt($ch, CURLOPT_URL, $url); 
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt($ch, CURLOPT_HEADER, 1); 
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
    //curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookies/cookies.txt'); 
    //curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookies/cookies.txt'); 
    curl_setopt($ch, CURLOPT_COOKIE, $strCookie); 
    $buffer = curl_exec($ch); 
    //jeÂżeli pojawiÂł siĂŞ b³¹d to go wyĹ「wietlimy 
    if(curl_exec($ch) === false) 
     { 
    echo 'Curl error: ' . curl_error($ch); 
     } 
    curl_close($ch); 
    return $buffer; 
    print $status; 
} 
+2

如果網站要你以編程方式訪問數據,他們會提供API – 2013-12-17 21:00:06

+0

抱歉:)我在標題中犯了一個錯誤。我試圖簡單地登錄並轉到該子然後下載數據。頁面不提供API。我注意到我的代碼在使用郵件地址作爲登錄的頁面上不起作用。 – user3112825

回答

0

這是我的問題的答案。此代碼運行良好。

// options 
$EMAIL   = 'login'; 
$PASSWORD   = 'haslo'; 
$cookie_file_path = "/cookies/cookies.txt"; 
$LOGINURL   = "https://domainname.com/auth"; 
$agent   = "Nokia-Communicator-WWW-Browser/2.0 (Geos 3.0 Nokia-9000i)"; 


$ch = curl_init(); 

$headers[] = "Accept: */*"; 
$headers[] = "Connection: Keep-Alive"; 


curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 
curl_setopt($ch, CURLOPT_HEADER, 0); 
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0); 
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);   
curl_setopt($ch, CURLOPT_USERAGENT, $agent); 
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file_path); 
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file_path); 


curl_setopt($ch, CURLOPT_URL, $LOGINURL); 


$fields = array(); 
$fields['loginemail'] = $EMAIL; 
$fields['loginhaslo'] = $PASSWORD; 


$POSTFIELDS = http_build_query($fields); 

curl_setopt($ch, CURLOPT_URL, $LOGINURL); 

curl_setopt($ch, CURLOPT_POST, 1); 
curl_setopt($ch, CURLOPT_POSTFIELDS, $POSTFIELDS); 

$result = curl_exec($ch); 


$remotePageUrl = 'https://domainname.com/partner/programy-lista'; 
curl_setopt($ch, CURLOPT_URL, $remotePageUrl); 
$result = curl_exec($ch);