2014-09-25 62 views
0

我使用nodejs xml解析器sax-jsxml獲取內容。該xml的結構如下:解析時從xml過濾元素

<item> 
    <title>Some title</title> 
    <guid isPermaLink="false">http://example.com</guid> 
</item> 
<item> 
    <title>VIDEO: Some title</title> 
    <guid isPermaLink="false">http://example1.com</guid> 
</item> 

我想​​下的所有URL,其title DONOT與VIDEO開始。

目前,它給了我所有的網址。

我的代碼目前是:

'use strict'; 
var sax = require('sax-js'); 
var request = require('request'); 

var href = 'http://some-xml-url.xml'; 

var urls = []; 
var isTextPending = false; 

var saxStream = sax.createStream(true); 
saxStream.on('error', function (e) { 
    console.error(e); 
}); 

saxStream.ontext = function (text) { 
    if(isTextPending) { 
     urls.push(text); 
     isTextPending = false; 
    } 
}; 
saxStream.on('opentag', function (node) { 
    if(node.name === 'guid' && node.attributes.isPermaLink === 'false') { 
     isTextPending = true; 
    } 
}); 
saxStream.end = function() { 
} 
request(href).pipe(saxStream); 
+0

如果您確定XML結構,可以使用RE來提取URL。否則,您可以[將XML轉換爲json](https://www.npmjs.com/package/fast-xml-parser),然後遍歷'item'並檢查'item [i] .title'是否不從''VIDEO'開始,然後將'item [i] .guid'存儲在某個數組中。 – 2017-11-30 13:58:12

回答

0

您將需要處理更多的國家,只是 'isTextPending'。

下面是一個示例(注意,這也處理'closetag'事件以排除處理中標記之間的文本)。

'use strict'; 
var sax = require('sax-js'); 
var request = require('request'); 

var href = 'http://some-xml-url.xml'; 

var urls = []; 
var tagName = undefined; 
var isValidGuid = false; 
var isValidTitle = false; 
var guidUrl = undefined; 

var saxStream = sax.createStream(true); 
saxStream.on('error', function (e) { 
    console.error(e); 
}); 

saxStream.ontext = function (text) { 
    if (tagName === 'guid' && isValidGuid) { 
      guidUrl = text; 
    } 
    else if (tagName === 'title') { 
     isValidTitle = !(text.indexOf('VIDEO') === 0); 
    } 
    else return; 

    if (guidUrl !== undefined && isValidTitle) { 
     urls.push(guidUrl); 
    } 
}; 
saxStream.on('opentag', function (node) { 

    tagName = node.name; 
    switch(node.name) { 
     case 'guid': 
      isValidGuid = (node.attributes.isPermaLink === 'false'); 
      break 
     case 'item': 
      isValidGuid = false; 
      isValidTitle = false; 
      guidUrl = undefined; 
     break;  

    } 
}); 
saxStream.on('closetag', function (node) { 
    tagName = undefined; 
}); 
saxStream.end = function() { 
    console.log('Result: '+JSON.stringify(urls)); 
}; 
request(href).pipe(saxStream);