我認爲以下幾點應該會有所幫助 - 您需要調整XPath查詢,以便定位特定的表格和特定的單元格內容,但主代碼似乎可以正常工作。我懷疑原始代碼的問題是URL爲https
,通常在進行卷曲請求時需要額外的配置設置。有curlrequest
函數中的設置可以刪除,我只是從另一個腳本中複製了這些設置。
改變路徑以$cacert
到cacert.pem
您的系統上的副本或到live version on curl.haxx.se
$url = 'https://www.sanita.puglia.it/monitorpo/aslfg/monitorps-web/monitorps/monitorPSperASL.do?codNazionale=160115';
function _curlrequest($url=null, $options=null){
$cacert='c:/wwwroot/cacert.pem';
$vbh = fopen('php://temp', 'w+');
$res=array(
'response' => null,
'verbose' => null,
'info' => array('http_code' => 100),
'headers' => null,
'errors' => null
);
if(is_null($url)) return (object)$res;
session_write_close();
/* Initialise curl request object */
$curl=curl_init();
if(parse_url($url,PHP_URL_SCHEME)=='https'){
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curl, CURLOPT_CAINFO, $cacert);
}
/* Define standard options */
curl_setopt($curl, CURLOPT_URL,trim($url));
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_FAILONERROR, true);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLINFO_HEADER_OUT, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_BINARYTRANSFER, true);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 20);
curl_setopt($curl, CURLOPT_TIMEOUT, 60);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36');
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl,CURLOPT_VERBOSE,true);
curl_setopt($curl,CURLOPT_NOPROGRESS,true);
curl_setopt($curl,CURLOPT_STDERR,$vbh);
/* Assign runtime parameters as options */
if(isset($options) && is_array($options)){
foreach($options as $param => $value) curl_setopt($curl, $param, $value);
}
/* Execute the request and store responses */
$res=(object)array(
'response' => curl_exec($curl),
'info' => (object)curl_getinfo($curl),
'errors' => curl_error($curl)
);
rewind($vbh);
$res->verbose=stream_get_contents($vbh);
fclose($vbh);
curl_close($curl);
return $res;
}
function getdom($data=false, $debug=false){
try{
if(!$data)throw new Exception('No data passed whilst trying to invoke DOMDocument');
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->validateOnParse=false;
$dom->standalone=true;
$dom->strictErrorChecking=false;
$dom->recover=true;
$dom->formatOutput=false;
$dom->loadHTML($data);
$errors=libxml_get_errors();
libxml_clear_errors();
return !empty($errors) && $debug ? $errors : $dom;
}catch(Exception $e){
echo $e->getMessage();
}
}
$obj=_curlrequest($url);
if($obj->info->http_code==200){
$dom=getdom($obj->response);
$xp=new DOMXPath($dom);
$query='//div[ contains(@class,"cRiga3 boxtriageS") ]';
$col=$xp->query($query);
if(!empty($col) && $col->length > 0){
foreach($col as $node)echo $node->nodeValue . '<br />';
}
}
此輸出
2
20
37
>1h
1
2
24
10
5
7
32
29
0
3
25
5
0
0
6
2