2013-07-07 161 views
1

我想爲超過30.000.000頁的頁面創建站點地圖。該頁面每日更新,刪除和添加新頁面。大型網站的php站點地圖

我發現這個PHP腳本,我想運行一個cron作業。

Sitemap php script

我已經在列的表「myuri」所有URI「URI」條目例如寫「/this-is-a-page.html」。我需要將哪些參數添加到腳本中以使其在我的表上運行?

<?php 

/* 
* author:  Kyle Gadd 
* documentation: http://www.php-ease.com/classes/sitemap.html 
* 
* This program is free software: you can redistribute it and/or modify 
* it under the terms of the GNU General Public License as published by 
* the Free Software Foundation, either version 3 of the License, or 
* (at your option) any later version. 
* 
* This program is distributed in the hope that it will be useful, 
* but WITHOUT ANY WARRANTY; without even the implied warranty of 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
* GNU General Public License for more details. 
* 
* You should have received a copy of the GNU General Public License 
* along with this program. If not, see <http://www.gnu.org/licenses/>. 
*/ 

class Sitemap { 

    private $compress; 
    private $page = 'index'; 
    private $index = 1; 
    private $count = 1; 
    private $urls = array(); 

    public function __construct ($compress=true) { 
    ini_set('memory_limit', '75M'); // 50M required per tests 
    $this->compress = ($compress) ? '.gz' : ''; 
    } 

    public function page ($name) { 
    $this->save(); 
    $this->page = $name; 
    $this->index = 1; 
    } 

    public function url ($url, $lastmod='', $changefreq='', $priority='') { 
    $url = htmlspecialchars(BASE_URL . $url); 
    $lastmod = (!empty($lastmod)) ? date('Y-m-d', strtotime($lastmod)) : false; 
    $changefreq = (!empty($changefreq) && in_array(strtolower($changefreq), array('always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'))) ? strtolower($changefreq) : false; 
    $priority = (!empty($priority) && is_numeric($priority) && abs($priority) <= 1) ? round(abs($priority), 1) : false; 
    if (!$lastmod && !$changefreq && !$priority) { 
     $this->urls[] = $url; 
    } else { 
     $url = array('loc'=>$url); 
     if ($lastmod !== false) $url['lastmod'] = $lastmod; 
     if ($changefreq !== false) $url['changefreq'] = $changefreq; 
     if ($priority !== false) $url['priority'] = ($priority < 1) ? $priority : '1.0'; 
     $this->urls[] = $url; 
    } 
    if ($this->count == 50000) { 
     $this->save(); 
    } else { 
     $this->count++; 
    } 
    } 

    public function close() { 
    $this->save(); 
    $this->ping_search_engines(); 
    } 

    private function save() { 
    if (empty($this->urls)) return; 
    $file = "sitemap-{$this->page}-{$this->index}.xml{$this->compress}"; 
    $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; 
    $xml .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
    foreach ($this->urls as $url) { 
     $xml .= ' <url>' . "\n"; 
     if (is_array($url)) { 
     foreach ($url as $key => $value) $xml .= " <{$key}>{$value}</{$key}>\n"; 
     } else { 
     $xml .= " <loc>{$url}</loc>\n"; 
     } 
     $xml .= ' </url>' . "\n"; 
    } 
    $xml .= '</urlset>' . "\n"; 
    $this->urls = array(); 
    if (!empty($this->compress)) $xml = gzencode($xml, 9); 
    $fp = fopen(BASE_URI . $file, 'wb'); 
    fwrite($fp, $xml); 
    fclose($fp); 
    $this->index++; 
    $this->count = 1; 
    $num = $this->index; // should have already been incremented 
    while (file_exists(BASE_URI . "sitemap-{$this->page}-{$num}.xml{$this->compress}")) { 
     unlink(BASE_URI . "sitemap-{$this->page}-{$num}.xml{$this->compress}"); 
     $num++; 
    } 
    $this->index($file); 
    } 

    private function index ($file) { 
    $sitemaps = array(); 
    $index = "sitemap-index.xml{$this->compress}"; 
    if (file_exists(BASE_URI . $index)) { 
     $xml = (!empty($this->compress)) ? gzfile(BASE_URI . $index) : file(BASE_URI . $index); 
     $tags = $this->xml_tag(implode('', $xml), array('sitemap')); 
     foreach ($tags as $xml) { 
     $loc = str_replace(BASE_URL, '', $this->xml_tag($xml, 'loc')); 
     $lastmod = $this->xml_tag($xml, 'lastmod'); 
     $lastmod = ($lastmod) ? date('Y-m-d', strtotime($lastmod)) : date('Y-m-d'); 
     if (file_exists(BASE_URI . $loc)) $sitemaps[$loc] = $lastmod; 
     } 
    } 
    $sitemaps[$file] = date('Y-m-d'); 
    $xml = '<?xml version="1.0" encoding="UTF-8"?>' . "\n"; 
    $xml .= '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
    foreach ($sitemaps as $loc => $lastmod) { 
     $xml .= ' <sitemap>' . "\n"; 
     $xml .= ' <loc>' . BASE_URL . $loc . '</loc>' . "\n"; 
     $xml .= ' <lastmod>' . $lastmod . '</lastmod>' . "\n"; 
     $xml .= ' </sitemap>' . "\n"; 
    } 
    $xml .= '</sitemapindex>' . "\n"; 
    if (!empty($this->compress)) $xml = gzencode($xml, 9); 
    $fp = fopen(BASE_URI . $index, 'wb'); 
    fwrite($fp, $xml); 
    fclose($fp); 
    } 

    private function xml_tag ($xml, $tag, &$end='') { 
    if (is_array($tag)) { 
     $tags = array(); 
     while ($value = $this->xml_tag($xml, $tag[0], $end)) { 
     $tags[] = $value; 
     $xml = substr($xml, $end); 
     } 
     return $tags; 
    } 
    $pos = strpos($xml, "<{$tag}>"); 
    if ($pos === false) return false; 
    $start = strpos($xml, '>', $pos) + 1; 
    $length = strpos($xml, "</{$tag}>", $start) - $start; 
    $end = strpos($xml, '>', $start + $length) + 1; 
    return ($end !== false) ? substr($xml, $start, $length) : false; 
    } 

    public function ping_search_engines() { 
    $sitemap = BASE_URL . 'sitemap-index.xml' . $this->compress; 
    $engines = array(); 
    $engines['www.google.com'] = '/webmasters/tools/ping?sitemap=' . urlencode($sitemap); 
    $engines['www.bing.com'] = '/webmaster/ping.aspx?siteMap=' . urlencode($sitemap); 
    $engines['submissions.ask.com'] = '/ping?sitemap=' . urlencode($sitemap); 
    foreach ($engines as $host => $path) { 
     if ($fp = fsockopen($host, 80)) { 
     $send = "HEAD $path HTTP/1.1\r\n"; 
     $send .= "HOST: $host\r\n"; 
     $send .= "CONNECTION: Close\r\n\r\n"; 
     fwrite($fp, $send); 
     $http_response = fgets($fp, 128); 
     fclose($fp); 
     list($response, $code) = explode (' ', $http_response); 
     if ($code != 200) trigger_error ("{$host} ping was unsuccessful.<br />Code: {$code}<br />Response: {$response}"); 
     } 
    } 
    } 

    public function __destruct() { 
    $this->save(); 
    } 

} 

?> 

已經有使用的頁面上的例子:

<?php 

require_once ('php/classes/Sitemap.php'); 

$sitemap = new Sitemap; 

if (get('pages')) { 
    $sitemap->page('pages'); 
    $result = db_query ("SELECT url, created FROM pages"); // 20 pages 
    while (list($url, $created) = $result->fetch_row()) { 
    $sitemap->url($url, $created, 'yearly'); 
    } 
} 

if (get('posts')) { 
    $sitemap->page('posts'); 
    $result = db_query ("SELECT url, updated FROM posts"); // 70,000 posts 
    while (list($url, $updated) = $result->fetch_row()) { 
    $sitemap->url($url, $updated, 'monthly'); 
    } 
} 

$sitemap->close(); 
unset ($sitemap); 

function get ($name) { 
    return (isset($_GET['update']) && strpos($_GET['update'], $name) !== false) ? true : false; 
} 

?> 

回答

0

我會改變這部分....

if (get('pages')) { 
$sitemap->page('pages'); 
$result = db_query ("SELECT uri FROM myuri"); 
while (list($url) = mysql_fetch_row($result)) { 
$sitemap->url($url,'', 'yearly'); 
    } 
} 

不知道是否需要的是$updated?看起來這個函數只是將它默認爲一個空字符串......反正也許你可以在你的表的時間戳列上拉取最後更新的日期,然後將它提供給我放置''的函數。

而且....除去這部分...

if (get('posts')) { 
$sitemap->page('posts'); 
$result = db_query ("SELECT url, updated FROM posts"); // 70,000 posts 
while (list($url, $updated) = $result->fetch_row()) { 
$sitemap->url($url, $updated, 'monthly'); 
} 
} 
+0

由於到目前爲止它不是工作呢還,所以我創建了一個新的職位與當前狀態 – VolkaRacho

+0

改變了...而(名單($網址($ url,$ created)= mysql_fetch_row($ result)) – KyleK

+0

完美的工作就像一個魅力現在!$ this-> fetch_row()非常感謝 :) – VolkaRacho