2012-10-24 33 views
2

我試圖從URL中使用nodejs(使用cheerio)打開圖形元數據, 使用下面的代碼。在nodejs中打開圖形的刮擦算法

我有這樣的事情來填補:var result={};

for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) { 
    if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].attribs.content)) { 
     if (metalist[ogCounter].attribs.property.indexOf('og') == 0) { 
      var ogname = metalist[ogCounter].attribs.property.split(':'); 
      var property = ogname[1]; 
      var content = metalist[ogCounter].attribs.content; 

      if (utils.isEmpty(result[property])) { 
       result[property] = content; 
      } else { 
       if (result[property].push) { 
        result[property].push(content); 
       } else { 
        result[property] = [result[property], content]; 
       } 
      } 

     } 
    } 
} 

我填充我的JSON和與此代碼轉換的結果後,我得到的東西,如:

type: "video", 
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg", 
video: [ 
"http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1", 
"application/x-shockwave-flash", 
"1920", 
"1080" 
] 

但我想要的東西是這樣的:

type: "video", 
image: "http://i3.ytimg.com/vi/fWNaR-rxAic/mqdefault.jpg", 
video: { 
"http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1", 
{ 
type:"application/x-shockwave-flash", 
width:"1920", 
height:"1080" 
} 
} 

我想這個「如果」,但它不工作:

if (utils.isEmpty(result[property])) { 
        result[property] = content; 
       } else { 
        if (result[property].push) { 
         result[property].push(content); 
        } else { 
         var subresult={}; 
         subresult[name[2]]=content; 
         subresult[property]=result[property] ; 

         result[property] = subresult; 
        } 
       } 

我不想循環所有元2次,我不擅長javascript和nodejs函數......任何建議?謝謝

+0

樂意提供幫助,但如果您發佈實際可運行的代碼,則會得到更快的響應。任何想要幫助的人都需要重新構建程序的其餘部分。 –

回答

0

這是棘手的,因爲你想如何顯示og:video。我不認爲你可以這樣做。最容易做的事情是在同一水平og:video:width分配一個標識符例如name,並將它

結果示例

{ 
    "type": "video.other", 
    "url": "http://philippeharewood.com/facebook/video.html", 
    "title": "Simple Plan", 
    "video": { 
    "name": "http://www.youtube.com/v/Y4MnpzG5Sqc?version=3&amp;autohide=1", 
    "type": "application/x-shockwave-flash", 
    "width": "398", 
    "height": "224", 
    "release_date": "2012-05-29T21:30" 
    } 
} 

如何它可以做,

var cheerio = require('cheerio') 
var request = require('request') 

var url = 'http://philippeharewood.com/facebook/video.html'; 
var result = {}; 

request(url, function(error, response, body) { 
    var $ = cheerio.load(body); 

    var meta = $('meta') 
    var keys = Object.keys(meta) 

    keys.forEach(function(key){ 
    if (meta[key].attribs 
     && meta[key].attribs.property 
     && meta[key].attribs.property.indexOf('og') == 0 
     ) 
    { 
     var og = meta[key].attribs.property.split(':'); 

     if(og.length > 2) { 
     if(result[og[1]]) { 
      if (typeof result[og[1]] == 'string' 
      || result[og[1]] instanceof String 
      ) 
      { 
      var set = {}; 
      set['name'] = result[og[1]]; 
      set[og[2]] = meta[key].attribs.content; 
      result[og[1]] = set; 
      } 
      else { 
      ex_set = result[og[1]]; 
      ex_set[og[2]] = meta[key].attribs.content; 
      result[og[1]] = ex_set; 
      } 
     } 
     else { 
      var set = {}; 
      set[og[2]] = meta[key].attribs.content; 
      result[og[1]] = set; 
     } 
     } 
     else { 
     result[og[1]] = meta[key].attribs.content; 
     } 
    } 
    }); 

    console.log(JSON.stringify(result, undefined, 2)); 

}); 
+0

正確,在JS中請求的數據結構是不可能的,因爲在_object_中每個屬性都需要一個名稱。可能的做法是將外方括號替換爲捲曲方括號 - 從而將視頻創建爲帶有兩個條目的_array_,第一個是包含URL的字符串值,第二個是具有「元數據」的對象。但是我沒有看到任何實際的好處 - 必須從該數據結構中的所有其他部分切換爲對象屬性,並且僅在此時基於索引纔會相當混亂。 – CBroe

0

這是我的答案。 @phwd完全回答了這個問題,但我認爲能夠制定一個更全面的解決方案,將所有meta標籤解析爲n級別會更好。

var cheerio = require('cheerio'), 
    request = require('request'), 
    url = 'http://philippeharewood.com/facebook/video.html', 
    result = {}, 
    attr = function(tag, prop){ return tag.attribs && tag.attribs[prop] || ""; } 

request(url, function(err, res, body) { 

    var metas = cheerio.load(body)('meta') 
    var keys = Object.keys(metas) 

    keys.forEach(function(i){ 
    var meta = metas[i], 
     property = attr(meta,'property'), 
     parts = property.split(":"); 

    if (property) { 
     var og = property.split(':'), 
     parent = result; 

     for (var j = 0; j < og.length; j++){ 
     var token = og[j], 
      current = parent[token], 
      name; 

     if (j+1 == og.length) { // leaf node 

      // expected leaf is already a branch so append a name attr 
      if (current instanceof Object) name = token; 
      // leaf should take the value given 
      else parent[token] = attr(meta,'content'); 

     } else { // branch node 

      // if no such branch exists, make one 
      if (!(current instanceof Object)) { 
      // if the branch is already a leaf, move value to name attr 
      if (typeof current == "string") name = current; 
      current = {}; 
      parent[token] = current; 
      } 
     } 
     if (name) current["name"] = name; 
     name = undefined 
     parent = current; 
     } 
    } 
    }); 

    console.log(JSON.stringify(result.og, undefined, 2)); 

});