2017-07-30 33 views
0

根據主題,我試圖讓循環播放htmlstring中的一個元素並提取href屬性。我如何循環播放htmlstring中的一個元素,並提取href屬性

在以下示例中.each從不運行。

const cheerio = require('cheerio'); 
const htmlparser2 = require('htmlparser2'); 

var htmlstring = '<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="da" ng-app="sdk.portal"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1, user-scalable=0"><meta sdk-core-metatags-directive><title data-ng-bind="metaTagVm.titleTag"></title><link rel="stylesheet" type="text/css" href="/public/css/sdk-06a073b7f6.styleguide.css"><link rel="stylesheet" type="text/css" href="/public/css/sdk-2f8a814242.styleguide-blessed1.css"><link rel="apple-touch-icon" sizes="180x180" href="../../public/images/apple-touch-icon.png"><link rel="icon" type="image/png" href="../../public/images/favicon-32x32.png" sizes="32x32"><link rel="icon" type="image/png" href="../../public/images/favicon-16x16.png" sizes="16x16"><link rel="manifest" href="/manifest.json"><link rel="mask-icon" href="../../public/images/safari-pinned-tab.svg" color="#b20c1c"><meta name="msapplication-TileColor" content="#b20c1c"><meta name="msapplication-TileImage" content="../../public/images/mstile-144x144.png"><meta name="theme-color" content="#ffffff"><script src="/public/js/vendor-f84d227302.js"></script><script src="/public/js/sdk-a8dae725ad.bundle.js"></script><base href="/" /><sdk-core-analytics></sdk-core-analytics></head><body><div><div ui-view="coreView"><div style="width: 30%; margin-left: auto; margin-right: auto; text-align: center"><!--[if lt IE 9]><h2>Opdatér din browser for at bruge sundhed.dk</h2><p>Det ser ud til, at du bruger et browser-program af ældre dato.<br/>Sundhed.dk’s mange funktioner kræver nu en nyere browser.</p><p/>Download en opdateret browser her: <hr/><a href="https://www.microsoft.com/da-dk/download/details.aspx?id=29254">Internet Explorer</a><hr/><a href="https://www.google.com/chrome/browser/desktop/index.html">Chrome</a><hr/><a href="https://www.mozilla.org/da/firefox/new/">Firefox</a><hr/><a href="http://www.opera.com/da/download">Opera</a></p><![endif]--><!--[if (gte IE 9) | (!IE)]><p>Vent et øjeblik</p><![endif]--></div></div></div><div id="includes"><div id="includePlaceholder"></div></div></body></html>'; 
var dom = htmlparser2.parseDOM(htmlstring); 
var $ = cheerio.load(dom); 
$('a').each(function(idx, el) { 
    console.log('in each'); 
    var $el = $(el); 
    var att = $el.attr('href'); 
    console.log(att); 
}); 

如果我嘗試看看$(「A」)給了我它是下列對象:

initialize { 
    options: 
    { withDomLvl1: true, 
    normalizeWhitespace: false, 
    xml: false, 
    decodeEntities: true }, 
    _root: 
    initialize { 
    '0': 
     { type: 'root', 
     name: 'root', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: null, 
     prev: null, 
     next: null }, 
    options: 
     { withDomLvl1: true, 
     normalizeWhitespace: false, 
     xml: false, 
     decodeEntities: true }, 
    length: 1, 
    _root: [Circular] }, 
    length: 0, 
    prevObject: 
    initialize { 
    '0': 
     { type: 'root', 
     name: 'root', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: null, 
     prev: null, 
     next: null }, 
    options: 
     { withDomLvl1: true, 
     normalizeWhitespace: false, 
     xml: false, 
     decodeEntities: true }, 
    length: 1, 
    _root: [Circular] } } 

這看起來是cheerio選擇對象,所以我在做什麼錯並且它實際上是一個cheerio的錯誤,因爲我懷疑這是因爲它確實像我正在按照我預期的方式做事。

回答

0

我問起這一點上cheeriojs https://github.com/cheeriojs/cheerio/issues/1063,有人指出,所有的鏈接是一個條件註釋裏面,所以我的代碼應該是

const cheerio = require('cheerio'); 
const htmlparser2 = require('htmlparser2'); 

var htmlstring = '<html lang="da"><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1, user-scalable=0"><meta sdk-core-metatags-directive><title data-ng-bind="metaTagVm.titleTag"></title><link rel="stylesheet" type="text/css" href="/public/css/sdk-06a073b7f6.styleguide.css"><link rel="stylesheet" type="text/css" href="/public/css/sdk-2f8a814242.styleguide-blessed1.css"><link rel="apple-touch-icon" sizes="180x180" href="../../public/images/apple-touch-icon.png"><link rel="icon" type="image/png" href="../../public/images/favicon-32x32.png" sizes="32x32"><link rel="icon" type="image/png" href="../../public/images/favicon-16x16.png" sizes="16x16"><link rel="manifest" href="/manifest.json"><link rel="mask-icon" href="../../public/images/safari-pinned-tab.svg" color="#b20c1c"><meta name="msapplication-TileColor" content="#b20c1c"><meta name="msapplication-TileImage" content="../../public/images/mstile-144x144.png"><meta name="theme-color" content="#ffffff"><script src="/public/js/vendor-f84d227302.js"></script><script src="/public/js/sdk-a8dae725ad.bundle.js"></script><base href="/" /><sdk-core-analytics></sdk-core-analytics></head><body><div><div ui-view="coreView"><div style="width: 30%; margin-left: auto; margin-right: auto; text-align: center"><h2>Opdatér din browser for at bruge sundhed.dk</h2><p>Det ser ud til, at du bruger et browser-program af ældre dato.<br/>Sundhed.dk’s mange funktioner kræver nu en nyere browser.</p><p/>Download en opdateret browser her: <hr/><a href="https://www.microsoft.com/da-dk/download/details.aspx?id=29254">Internet Explorer</a><hr/><a href="https://www.google.com/chrome/browser/desktop/index.html">Chrome</a><hr/><a href="https://www.mozilla.org/da/firefox/new/">Firefox</a><hr/><a href="http://www.opera.com/da/download">Opera</a></p><!--[if (gte IE 9) | (!IE)]><p>Vent et øjeblik</p><![endif]--></div></div></div><div id="includes"><div id="includePlaceholder"></div></div></body></html>'; 
var dom = htmlparser2.parseDOM(htmlstring); 
var $ = cheerio.load(dom); 
$('a').each(function(idx, el) { 
    console.log('in each'); 
    var $el = $(el); 
    var att = $el.attr('href'); 
    console.log(att); 
}); 
相關問題