2013-04-10 78 views
0

我試圖獲取網頁的HTML源代碼,然後用它來獲取Javascript標記中的數據。用於獲取Javascript標記中的數據的HTML源代碼

頁面上的JavaScript代碼是這樣的:

<script> 
fullplayer("player", { 
      src:"full_width_player.swf", 
      wmode:"window" 
     }, { 
    key: '#$0c4de1874473849ff8a', 
    canvas: { 
     backgroundGradient: "none", 
     backgroundColor: "#000000" 
    }, 
    audio: { 
     provider: 'servstat', 
     q: '128' 
    }, 
    playlist: '/get.php?location=/audio/welcome.mp3', 
    plugins: { 
     youtube: { } 
    } 
}); 
</script> 

我正在尋找一種方式來獲得內部playlist:'*****',值,這意味着音軌的位置 - /get.php?location=/audio/welcome.mp3

這是可能與HTML DOM短語,或者我需要抓住JavaScript標籤內的整個東西,並讀取數據爲XML或類似的東西?

回答

1

嘗試,這是一個正則表達式

preg_match_all('~playlist:[ ]*[\'|"](.*?)[\'|"]~si',$HTML,$Match); 
print_r($Match); 

輸出爲您的樣品:

Array 
(
    [0] => Array 
    (
     [0] => playlist: '/get.php?location=/audio/welcome.mp3' 
    ) 

    [1] => Array 
    (
     [0] => /get.php?location=/audio/welcome.mp3 
    ) 

) 

捲曲

function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){ 
      if($String==1){ 
      $response=explode("\r\n",$response); 
      } 
      $PartHeader=0; 
      $out[$PartHeader]=array(); 
      while(list($key,$val)=each($response)){ 
       $name=''; 
       $value=''; 
       $flag=false; 
       for($i=0;$i<strlen($val);$i++){ 
        if($val[$i]==":"){ 
         $flag=true; 
         for($j=$i+1;$j<strlen($val);$j++){ 
         if($val[$i]=="\r" and $val[$i+1]=="\n"){  
          break; 
         } 
         $value.=$val[$j]; 
         } 
         break; 
        } 
        $name.=$val[$i]; 
       } 
       if($flag){ 
       if($name=='' and $value==''){ 
        $PartHeader++; 
       }else{ 
        if(isset($out[$PartHeader][$name])){ 
        if(is_array($out[$PartHeader][$name])){ 
         $out[$PartHeader][$name][]=$value; 
        }else{ 
         $T=$out[$PartHeader][$name]; 
         $out[$PartHeader][$name]=array(); 
         $out[$PartHeader][$name][0]=$T; 
         $out[$PartHeader][$name][1]=$value; 
        } 
        }else{ 
        $out[$PartHeader][$name]=$value; 
        } 
       } 
       }else{ 
       if($name==''){ 
        $PartHeader++; 
       }else{ 
        if(isset($out[$PartHeader][$name])){ 
         if(is_array($out[$PartHeader][$name])){ 
         $out[$PartHeader][$name][]=$value; 
         }else{ 
         $T=$out[$PartHeader][$name]; 
         $out[$PartHeader][$name]=array(); 
         $out[$PartHeader][$name][0]=$T; 
         $out[$PartHeader][$name][1]=$name; 
         } 
        }else{ 
         $out[$PartHeader][$name]=$name; 
        } 
       } 
       } 
       if($Run!=""){ 
       $Run($name,$value); 
       } 
      } 
      return $out; 
} 

class cURL { 
    var $headers; 
    var $user_agent; 
    var $compression; 
    var $cookie_file; 
    var $proxy; 
    var $Cookie; 
    function CookieAnalysis($Cookie){//convert str cookie to array cookie 
     //echo $Cookie; 
     $this->Cookie=array(); 
     preg_match("~(.*?)=(.*?);~si",' '.$Cookie.'; ',$M); 
     $this->Cookie[trim($M[1])]=trim($M[2]); 
     return $this->Cookie; 
    } 
    function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') { 
     $this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'; 
     $this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3'; 
     $this->headers[] = 'Accept-Encoding:gzip,deflate,sdch'; 
     $this->headers[] = 'Accept-Language:en-US,en;q=0.8'; 
     $this->headers[] = 'Cache-Control:max-age=0'; 
     $this->headers[] = 'Connection:keep-alive'; 
     $this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5'; 
     $this->compression=$compression; 
     $this->proxy=$proxy; 
     $this->cookies=$cookies; 
     if ($this->cookies == TRUE) $this->cookie($cookie); 
    } 
    function cookie($cookie_file) { 
     if (file_exists($cookie_file)) { 
      $this->cookie_file=$cookie_file; 
     } else { 
      fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions'); 
      $this->cookie_file=$cookie_file; 
      @fclose($this->cookie_file); 
     } 
    } 
    function GET($url) { 
     $process = curl_init($url); 
     curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers); 
     curl_setopt($process, CURLOPT_HEADER, 1); 
     curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent); 
     if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file); 
     if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file); 
     curl_setopt($process,CURLOPT_ENCODING , $this->compression); 
     curl_setopt($process, CURLOPT_TIMEOUT, 30); 
     if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy); 
     curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); 
     curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1); 
     $response = curl_exec($process); 
     $header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE); 
     $result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1); 
     foreach($result['Header'] as $HeaderK=>$HeaderP){ 
      if(!is_array($HeaderP['Set-Cookie']))continue; 
      foreach($HeaderP['Set-Cookie'] as $key=>$val){ 
      $result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val); 
      } 
     } 
     $result['Body'] = substr($response, $header_size); 
     $result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE); 
     $result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL); 
     curl_close($process); 
     return $result; 
    } 
    function POST($url,$data) { 
     $process = curl_init($url); 
     curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers); 
     curl_setopt($process, CURLOPT_HEADER, 1); 
     curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent); 
     if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file); 
     if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file); 
     curl_setopt($process, CURLOPT_ENCODING , $this->compression); 
     curl_setopt($process, CURLOPT_TIMEOUT, 30); 
     if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy); 
     curl_setopt($process, CURLOPT_POSTFIELDS, $data); 
     curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); 
     curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1); 
     curl_setopt($process, CURLOPT_POST, 1); 
     $response = curl_exec($process); 
     $header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE); 
     $result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1); 
     foreach($result['Header'] as $HeaderK=>$HeaderP){ 
      if(!is_array($HeaderP['Set-Cookie']))continue; 
      foreach($HeaderP['Set-Cookie'] as $key=>$val){ 
      $result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val); 
      } 
     } 
     $result['Body'] = substr($response, $header_size); 
     $result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE); 
     $result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL); 
     curl_close($process); 
     return $result; 
    } 
    function error($error) { 
     echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>"; 
     die; 
    } 
} 

樣本:

$cc = new cURL(); 
    $Data=$cc->get('http://www.yahoo.com'); 
    preg_match_all('~playlist:[ ]*[\'|"](.*?)[\'|"]~si',$Data['Body'],$Match); 
    print_r($Match); 
+0

嗨,我USI ng curl來獲取html源代碼,任何想法如何將從curl抓取的源代碼分配給你的代碼?我試過它給我錯誤。 – naveencgr8 2013-04-10 15:27:31

+0

我分配的輸出來自curl到你的代碼,但它給了我一個錯誤,警告:preg_match_all()期望參數2是字符串,在 – naveencgr8 2013-04-10 15:28:19

+0

給出的資源$ HTML是一個字符串等待我把它捲曲 – 2013-04-10 15:29:35