2012-05-28 41 views
1

我從我的公司站點獲取了此HTML代碼。由於我無法訪問數據庫,因此我想要解析HTML文件並返回值。該代碼是這樣的:解析HTML文件並返回值作爲php變量

<?php 
$string = ' 
<p> <b>HEADER INFO</b> 
<table width=100% cellspacing=0> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>View Object:</b> 6600422</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BPO:</b> G37147359-000000</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Ack Date:</b> 2012-05-28</font></td> 
    </tr> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=3><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Operation(s):</b> PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End</font></td> 
    </tr> 
</table> 
</p> 
<hr> 
<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b> 
<table width=100% cellspacing=0> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>SAP Sales Order Number</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Customer P.O. Number</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Legacy Order Number</b></font></td> 
    </tr> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">0310363858</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">77340892008-120413</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">89FF09378001</font></td> 
    </tr> 
</table> 
</p> 
<hr> 
<p> <b>PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)</b> 
<table width=100% cellspacing=0> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>PL</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Product #</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Options</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial #</b></font></td> 
    </tr> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">3C</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">AP703B</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td> 
    <td valign=top colspan=1>&nbsp </td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td> 
    </tr> 
</table> 
</p> 
<hr> 
<p> <b>Station Info</b> 
<table width=100% cellspacing=0> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Start Station:</b> JPN_End</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Location:</b> Done</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Station:</b> </font></td> 
    </tr> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Birth Date/Time:</b> 2012-05-23 14:20:32 SGT</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Power Cord:</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Voltage:</b></font></td> 
    </tr> 
</table> 
</p> 
<hr> 
<p> <b>MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S)</b> 
<table width=100% cellspacing=0> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Part Number</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Qty</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Description</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>BB Type</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Material Location</b></font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\"><b>Serial Number</b></font></td> 
    </tr> 
    <tr align=left> 
    <td width=2% colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">&nbsp;&nbsp;</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">[email protected]@</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">1</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">OEM Generic 1U SAS Enclosure</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">BOM</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">ASSY</font></td> 
    <td valign=top colspan=1><font face=\"verdana, arial, helvetica\" size=\"-2\">2S6219000G</font></td> 
    </tr> 
</table> 
</p> 
'; 

$result = parse_data($string); 

extract($result); 

echo $headertext.'<br />'; 
echo $sapSON.'<br />'; 
echo $custPON.'<br />'; 
echo $legacyON.'<br />'; 
echo $pl.'<br />'; 
echo $pn.'<br />'; 


function parse_data($string){ 
$string = str_replace('&nbsp;&nbsp;','',$string); 

$xml = new DOMDocument(); 
@$xml->loadHTML($string); 

$ret = array(); 

foreach($xml->getElementsByTagName('p') as $p) { 
    $header = trim($p->nodeValue); 
} 

foreach($xml->getElementsByTagName('td') as $td) { 
    $value = trim($td->nodeValue); 
    if(!empty($value) && is_numeric($value{0})){ 
     $ret[] = $value; 
    } 
} 

$ret = array('headertext'=>$header, 
      'sapSON'=>$ret[0], 
      'custPON'=>$ret[1], 
      'legacyON'=>$ret[2], 
      'pl'=>$ret[3], 
      'pn'=>$ret[4],); 

return $ret; 
} 
?> 

現在我想的標題「的外部訂單號碼參考保存到我變,我以後可以叫

而且,在第二,第三和第四列。第一行分別對應第二行第二列,第三列和第四列的值,我也想將這些值保存到變量中,所以基本上我需要一個PHP腳本來解析這個HTML文件並返回以下內容:

$header1 = "HEADER INFO"; 
$viewObject = "6600422"; 
$BPO = "G37147359-000000"; 
$AckDate = "2012-05-28"; 
$Operations = "PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End"; 
$header2 = "EXTERNAL ORDER NUMBER REFERENCE"; 
$sapSON = "0310363858"; 
$custPON = "77340892008-120413"; 
$legacyON = "89FF09378001"; 
$header3 = "PRODUCTS FOR THIS WORK OBJECT/OPERATION(S)" 
$pl = "3C"; 
$pn = "AP703B"; 
$qty = "1"; 
$options = "&nbsp;"; 
$serialNo = "2S6219000G"; 

等......基本上,我需要所有表格內容保存到變量中,因爲我稍後將它們保存到我的數據庫並創建報告並生成一些細節的條形碼

感謝您的幫助!

僅供參考:我無法訪問數據庫,因此我所能做的就是解析這個HTML文件並將這些值保存到可以存儲到數據庫中的變量中。另外,請注意標題是不變的,唯一改變的值是針對不同順序的數字。

+0

參見[如何來分析和處理PHP程序HTML?(http://stackoverflow.com/q/3577641/1396314) – flowfree

+0

貴公司的網站?你應該告訴他們'font'標籤已被棄用。通過使用css,你可以計算你節省了多少個字節的帶寬。 –

+0

@ChristianVarga,我不知道從哪裏開始,因爲我剛剛開始PHP。 – JudeJitsu

回答

2

這裏試試這個,See it in action

<?php 
$string = '<p> <b>EXTERNAL ORDER NUMBER REFERENCE</b> 
    <table width=100% cellspacing=0> 
     <tr align=left> 
     <td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2">&nbsp;&nbsp;</font></td> 
     <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>SAP Sales Order Number</b></font></td> 
     <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Customer P.O. Number</b></font></td> 
     <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2"><b>Legacy Order Number</b></font></td> 
     </tr> 
     <tr align=left> 
     <td width=2% colspan=1><font face="verdana, arial, helvetica" size="-2">&nbsp;&nbsp;</font></td> 
     <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">0310363858</font></td> 
     <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">77340892008-120413</font></td> 
     <td valign=top colspan=1><font face="verdana, arial, helvetica" size="-2">89FF09378001</font></td> 
    </tr> 
    </table> 
</p> 
'; 

$result = parse_data($string); 

extract($result); 

echo $headertext.'<br />'; 
echo $sapSON.'<br />'; 
echo $custPON.'<br />'; 
echo $legacyON.'<br />'; 


function parse_data($string){ 
    $string = str_replace('&nbsp;&nbsp;','',$string); 

    $xml = new DOMDocument(); 
    @$xml->loadHTML($string); 

    $ret = array(); 

    foreach($xml->getElementsByTagName('p') as $p) { 
     $header = trim($p->nodeValue); 
    } 

    foreach($xml->getElementsByTagName('td') as $td) { 
     $value = trim($td->nodeValue); 
     if(!empty($value) && is_numeric($value{0})){ 
      $ret[] = $value; 
     } 
    } 

    $ret = array('headertext'=>$header, 
       'sapSON'=>$ret[0], 
       'custPON'=>$ret[1], 
       'legacyON'=>$ret[2]); 

    return $ret; 
} 
?> 

編輯版本2(多行):

當你的表是它變得相當複雜每次迭代不同,但我像一個挑戰。在這裏,你走了,希望它可以幫助...

<?php 
$result = parse_data($string); 

//Create Variables From Values 
foreach($result as $key=>$value){ 
    foreach($value as $key_b=>$value_b){ 
     $$key_b = $value_b; 
    } 
} 
/* --New Available Variables-- 
    $header0 = HEADER INFO 
    $ViewObject = 6600422 
    $BPO = G37147359-000000 
    $AckDate = 2012-05-28 
    $Operations = PPS_Queue, PPS_Build, PPS_BoxAll, JPN_End 
    $header1 = EXTERNAL ORDER NUMBER REFERENCE 
    $SAPSalesOrderNumber = 0310363858 
    $CustomerPONumber = 77340892008-120413 
    $LegacyOrderNumber = 89FF09378001 
    $header2 = PRODUCTS FOR THIS WORK OBJECT/OPERATION(S) 
    $PL = 3C 
    $Product = AP703B 
    $Qty = 1 
    $Options = 
    $Serial = 2S6219000G 
    $header3 = Station Info 
    $StartStation = JPN_End 
    $Location = Done 
    $Station = 
    $BirthDateTime = 2012-05-23 14 
    $PowerCord = 
    $Voltage = 
    $header4 = MATERIAL LIST FOR THIS WORK OBJECT/OPERATION(S) 
    $PartNumber = [email protected]@ 
    $Description = OEM Generic 1U SAS Enclosure 
    $BBType = BOM 
    $MaterialLocation = ASSY 
    $SerialNumber = 2S6219000G 
*/ 

function parse_data($string){ 
    $string = str_replace('&nbsp;&nbsp;','',$string); 
    $parts = explode('<hr>',$string); 

    $html = new DOMDocument(); 
    $ret = array(); 
    $entry=0; 
    foreach($parts as $part){ 
     @$html->loadHTML($part); 
     //Get Header 
     foreach($html->getElementsByTagName('p') as $p) { 
      $ret[$entry]['header'.$entry] = trim($p->nodeValue); 
     } 
     $i=0; 
     foreach($html->getElementsByTagName('td') as $td){ 
      $value = trim($td->nodeValue); 
      if(empty($value)){ 
       continue; 
      } 
      switch($entry){ 
       case 0: 
        $split = explode(':',$value); 
        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]); 
        break; 
       case 1: 
        if(!is_numeric($value{0})){ 
         $ret[$entry][$i] = trim($value); 
        }else{ 
         $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-3])] = trim($value); 
         unset($ret[$entry][$i-3]); 
        } 
        break; 
       case 2: 
        if($i<=4){ 
         $ret[$entry][$i] = trim($value); 
        }else{ 
         $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-5])] = trim($value); 
         unset($ret[$entry][$i-5]); 
        } 
        break; 
       case 3: 
        $split = explode(':',$value); 
        $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $split[0])] = trim($split[1]); 
        break; 
       case 4: 
        if($i<=5){ 
         $ret[$entry][$i] = trim($value); 
        }else{ 
         $ret[$entry][preg_replace('/[^a-zA-Z]/s', '', $ret[$entry][$i-6])] = trim($value); 
         unset($ret[$entry][$i-6]); 
        } 
        break; 
      } 
      $i++; 
     } 
     $entry++; 
    } 
    return $ret; 
} 
?> 
+0

謝謝!會試試這個! – JudeJitsu

+0

我看到的唯一問題是如果你有更多的數據,那麼你在你的例子中給出,就像更多的數據行或多個'p'標籤。 –

+0

我的情況就是這樣。這是一個多表輸出。所以我必須解析所有的數據。我會看看我能做些什麼。非常感謝你!有了這個,我可以開始解析通過html,也許找到另一個解決方法,提前幾個顛簸:D – JudeJitsu