2013-11-28 167 views
1

我想刮我的項目分銷商之一,在每個產品的庫存數量。他們不知道如何導出這些數據。所以我想知道是否有人能夠幫助我指出如何使用PHP來抓取一個網站,而您必須登錄才能獲取數據?刮網站,安全登錄

我曾嘗試下面的腳本,但沒有工作。任何可以檢查出PLZ。

$postData="email_address=".urlencode("[email protected]")."&password=18&x=24&y=11"; 

$ch = curl_init(); 
curl_setopt($ch, CURLOPT_URL,"https://www.wonatrading.com/login.php"); 
curl_exec($ch); 

curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_URL, $url); 
$cookie = 'cookies.txt'; 
$timeout = 30; 

curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); 
curl_setopt($ch, CURLOPT_TIMEOUT,   10); 
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); 
curl_setopt($ch, CURLOPT_COOKIEJAR,  $cookie); 
curl_setopt($ch, CURLOPT_COOKIEFILE,  $cookie); 

curl_setopt ($ch, CURLOPT_POST, 1); 
curl_setopt ($ch,CURLOPT_POSTFIELDS,$postData);  

curl_exec($ch); 

curl_setopt($ch, CURLOPT_URL, "http://www.wonatrading.com/product_info.php?products_id=213754&kind=2&cPath=172_185"); 
echo curl_exec($ch); 

回答

2

好的,我會分享你這個..這是我一直用來刮的類。隨意使用它。

<?php 

class Scrape 
{ 
public $cookies = 'cookies.txt'; 
private $user = null; 
private $pass = null; 

/*Data generated from cURL*/ 
public $content = null; 
public $response = null; 

/* Links */ 
private $url = array(
        'login'  => 'https://www.wonatrading.com/account.php', 
        'submit' => 'https://www.wonatrading.com/login.php?action=process' 
        ); 

/* Fields */ 
public $data = array(); 

public function __construct ($user, $pass) 
{ 

    $this->user = $user; 
    $this->pass = $pass;   

} 

public function login() 
{ 

      $this->cURL($this->url['login']); 

      if($form = $this->getFormFields($this->content, 'login')) 
      { 
       $form['email_address'] = $this->user; 
       $form['password'] =$this->pass; 
       //echo "<pre>".print_r($form,true);exit; 
       $this->cURL($this->url['submit'], $form); 
       echo $this->content;exit; 
      } 
      echo $this->content;exit;  
} 

/* Scan for form */ 
private function getFormFields($data, $id) 
{ 
     if (preg_match('/(<form.*?name=.?'.$id.'.*?<\/form>)/is', $data, $matches)) { 
      $inputs = $this->getInputs($matches[1]); 

      return $inputs; 
     } else { 
      return false; 
     } 

} 

/* Get Inputs in form */ 
private function getInputs($form) 
{ 
    $inputs = array(); 

    $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches); 

    if ($elements > 0) { 
     for($i = 0; $i < $elements; $i++) { 
      $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]); 

      if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) { 
       $name = $name[1]; 
       $value = ''; 

       if (preg_match('/value=(?:["\'])?([^"\']*)/i', $el, $value)) { 
        $value = $value[1]; 
       } 

       $inputs[$name] = $value; 
      } 
     } 
    } 

    return $inputs; 
} 

/* Perform curl function to specific URL provided */ 
public function cURL($url, $post = false) 
{ 
    $ch = curl_init(); 
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30); 
    curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"); 
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); 
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); 
    curl_setopt($ch, CURLOPT_VERBOSE, 1); 
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
    curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookies); 
    curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookies); 
    curl_setopt($ch, CURLOPT_HEADER, 0); 
    curl_setopt($ch, CURLOPT_RETURNTRANSFER,1); 
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120); 
    curl_setopt($ch, CURLOPT_TIMEOUT, 120); 

    if($post) //if post is needed 
    { 
     curl_setopt($ch, CURLOPT_POST, 1); 
     curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($post)); 
    } 

    curl_setopt($ch, CURLOPT_URL, $url); 
    $this->content = curl_exec($ch); 
    $this->response = curl_getinfo($ch); 
    $this->url['last_url'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); 
    curl_close($ch); 
} 
} 


$sc = new Scrape('[email protected]','18UlG'); 
$sc->login(); 


?> 
+0

此說也封鎖IP我如何可以通過它 – user2793506

+0

得到我應該提到這一點,如果已經有餅乾救了那麼其在要不然登錄電子它的腳本不工作 – user2793506

+0

我試圖運行你的代碼的密碼不正確的? – Ramz