2015-02-06 68 views
1

如果站點地圖超過maxURLs,我想將站點地圖劃分爲站點地圖。以下示例應該將Sitemap分開,如果它有多個網址。如果站點地圖數量超過網站的最大數量,將站點地圖分割成更多站點地圖

import javax.xml.parsers.DocumentBuilder; 
    import javax.xml.parsers.DocumentBuilderFactory; 
    import javax.xml.parsers.ParserConfigurationException; 
import org.w3c.dom.CharacterData; 
import org.w3c.dom.*; 
import org.xml.sax.InputSource; 
import org.xml.sax.SAXException; 
    import java.io.IOException; 
    import java.io.StringReader; 
    import java.util.HashSet; 
    import java.util.Iterator; 
    import java.util.Set; 

    public class SiteMapSplitter { 

    public static void main(String[] args){ 

      String sitemapStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" + 
        "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" + 
        "<url>\n" + 
        "<loc>test1.html</loc>\n" + 
        "<lastmod>today</lastmod>\n" + 
        "<changefreq>daily</changefreq>\n" + 
        "<priority>1.0</priority>\n" + 
        "</url>\n" + 
        "<url>\n" + 
        "<loc>test2.html</loc>\n" + 
        "<lastmod>yesterday</lastmod>\n" + 
        "<changefreq>daily</changefreq>\n" + 
        "<priority>1.0</priority>\n" + 
        "</url></urlset>"; 
      try { 
       splitSitemap(sitemapStr); 
      } catch (ParserConfigurationException e) { 
       e.printStackTrace(); 
      } 
     } 

     static private void splitSitemap(String sitemapStr) throws ParserConfigurationException { 

      DocumentBuilder db = null; 
      try { 
       db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 
      } catch (ParserConfigurationException e) { 
       e.printStackTrace(); 
      } 
      InputSource is = new InputSource(); 
     is.setCharacterStream(new StringReader(sitemapStr)); 

      Document doc = null; 
      try { 
       doc = db.parse(is); 
      } catch (SAXException e) { 
       e.printStackTrace(); 
      } catch (IOException e) { 
       e.printStackTrace(); 
      } 
      NodeList nodes = doc.getElementsByTagName("url"); 

      int maxURLs = 1; 
      Set<String> smURLsSet= new HashSet<String>(); 
      if (nodes.getLength()>maxURLs){ 
       for (int i = 0; i < nodes.getLength(); i++) { 
        StringBuilder smURLsBuilder = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" + 
          "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n"); 
        for (int k = 0; k<maxURLs; k++){ 
         Element element = (Element) nodes.item(i); 
         smURLsBuilder.append(element); 
        } 
        smURLsSet.add(smURLsBuilder.toString()); 

     } 
       Iterator i = smURLsSet.iterator(); 
       while(i.hasNext()){ 
        System.out.println(i.next()); 
       } 
      } 

    } 

    } 

問題是Element element = (Element) nodes.item(i); smURLsBuilder.append(element);

不整個元件(在這種情況下url及其childreen)到smURLsBuilder追加。這個怎麼做?

回答

0

您應該考慮在站點地圖上使用面向對象的方法。無論是使用數據綁定(JAXB)還是使用更短的使用data projection(披露:我隸屬於該項目)。這樣你就不需要通過字符串連接來創建XML。

public class SitemapSplitter { 

    static String sitemapStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" + 
      "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" + 
      "<url>\n" + 
      "<loc>test1.html</loc>\n" + 
      "<lastmod>today</lastmod>\n" + 
      "<changefreq>daily</changefreq>\n" + 
      "<priority>1.0</priority>\n" + 
      "</url>\n" + 
      "<url>\n" + 
      "<loc>test2.html</loc>\n" + 
      "<lastmod>yesterday</lastmod>\n" + 
      "<changefreq>daily</changefreq>\n" + 
      "<priority>1.0</priority>\n" + 
      "</url></urlset>"; 

    public interface Sitemap { 
     @XBWrite("/urlset/url") 
     Sitemap setUrls(List<? extends Node> urls); 
    } 

    public static void main(String... args) { 
     XBProjector projector = new XBProjector(Flags.TO_STRING_RENDERS_XML); 
     // Get all urls from existing sitemap. 
     List<Node> urlNodes = projector.onXMLString(sitemapStr).evalXPath("/xbdefaultns:urlset/xbdefaultns:url").asListOf(Node.class); 
     for (Node urlNode: urlNodes) {    
      // Create a new sitemap, here with only one url 
      Sitemap newSitemap = projector.onXMLString(sitemapStr).createProjection(Sitemap.class).setUrls(Collections.singletonList(urlNode)); 
      System.out.println(newSitemap); 
     }  
    } 
} 

這個程序打印出

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> 
<url> 
<loc>test1.html</loc> 
<lastmod>today</lastmod> 
<changefreq>daily</changefreq> 
<priority>1.0</priority> 
</url> 
</urlset> 

<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> 
<url> 
<loc>test2.html</loc> 
<lastmod>yesterday</lastmod> 
<changefreq>daily</changefreq> 
<priority>1.0</priority> 
</url> 
</urlset> 
+0

如何使之成爲可變的網址? – Ronald 2015-02-07 13:14:24

+0

我的例子使用'Collections.singletonList(urlNode)'創建一個帶有一個URL的List。如果你想有3個URL,你需要創建其他'List'實例(例如LinkedList),並在你用你的列表調用'setUrls(...)'之前用3個URL填充它。這有幫助嗎? – Cfx 2015-02-07 13:20:43

+0

我已經添加了依賴 org.xmlbeam xmlprojector 1.4.6 org.xmlbeam xmlprojector 1.4.6 ,並試圖你的代碼。我在線程「main」java.lang.NoClassDefFoundError中得到異常:javax/jcr/Node – Ronald 2015-02-07 13:27:21