2017-01-30 58 views
1

我正在使用netsniff.js的代碼來生成har file,我想改進它以從數組中給出的多個鏈接生成har文件(在我的下面的代碼中命名爲links) 。使用phantom.js生成多個HAR文件

這裏有Using Multiple page.open in Single Script另一個問題,可能會幫助我,但我不知道如何實現我的代碼給定的解決方案..

下面是我的代碼(它如果links輸出文件記錄FAIL to load the address陣列包含多個項):

"use strict"; 
if (!Date.prototype.toISOString) { 
    Date.prototype.toISOString = function() { 
     function pad(n) { return n < 10 ? '0' + n : n; } 
     function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n } 
     return this.getFullYear() + '-' + 
      pad(this.getMonth() + 1) + '-' + 
      pad(this.getDate()) + 'T' + 
      pad(this.getHours()) + ':' + 
      pad(this.getMinutes()) + ':' + 
      pad(this.getSeconds()) + '.' + 
      ms(this.getMilliseconds()) + 'Z'; 
    } 
} 
var entries = []; 
function createHAR(address, title, startTime, resources) 
{ 
    resources.forEach(function (resource) { 
     var request = resource.request, 
      startReply = resource.startReply, 
      endReply = resource.endReply; 

     if (!request || !startReply || !endReply) { 
      return; 
     } 

     // Exclude Data URI from HAR file because 
     // they aren't included in specification 
     if (request.url.match(/(^data:image\/.*)/i)) { 
      return; 
     } 

     entries.push({ 
      startedDateTime: request.time.toISOString(), 
      time: endReply.time - request.time, 
      request: { 
       method: request.method, 
       url: request.url, 
       httpVersion: "HTTP/1.1", 
       cookies: [], 
       headers: request.headers, 
       queryString: [], 
       headersSize: -1, 
       bodySize: -1 
      }, 
      response: { 
       status: endReply.status, 
       statusText: endReply.statusText, 
       httpVersion: "HTTP/1.1", 
       cookies: [], 
       headers: endReply.headers, 
       redirectURL: "", 
       headersSize: -1, 
       bodySize: startReply.bodySize, 
       content: { 
        size: startReply.bodySize, 
        mimeType: endReply.contentType 
       } 
      }, 
      cache: {}, 
      timings: { 
       blocked: 0, 
       dns: -1, 
       connect: -1, 
       send: 0, 
       wait: startReply.time - request.time, 
       receive: endReply.time - startReply.time, 
       ssl: -1 
      }, 
      pageref: address 
     }); 
    }); 

    return { 
     log: { 
      version: '1.2', 
      creator: { 
       name: "PhantomJS", 
       version: phantom.version.major + '.' + phantom.version.minor + 
        '.' + phantom.version.patch 
      }, 
      pages: [{ 
       startedDateTime: startTime.toISOString(), 
       id: address, 
       title: title, 
       pageTimings: { 
        onLoad: page.endTime - page.startTime 
       } 
      }], 
      entries: entries 
     } 
    }; 
} 
var page = require('webpage').create() 
var fs = require('fs'); 
var count = 0; 
function processSites(links) 
{ 
    page.address = links.pop(); 
    var path = 'file' + count + '.har'; 
    page.resources = []; 
    console.log("page resources:", page.resources) 
    count = count + 1; 
    page.onLoadStarted = function() { 
     page.startTime = new Date(); 
    }; 
    page.onResourceRequested = function (req) { 
     page.resources[req.id] = { 
      request: req, 
      startReply: null, 
      endReply: null 
     }; 
    }; 

    page.onResourceReceived = function (res) { 
     if (res.stage === 'start') { 
      page.resources[res.id].startReply = res; 
     } 
     if (res.stage === 'end') { 
      page.resources[res.id].endReply = res; 
     } 
    }; 

    page.open(page.address, function (status) { 
     var har; 
     setTimeout(function() { 
      if (status !== 'success') { 
       console.log('FAIL to load the address'); 
       phantom.exit(1); 
      } else { 
       page.endTime = new Date(); 
       page.title = page.evaluate(function() { 
        return document.title; 
       }); 
       entries = []; 
       har = createHAR(page.address, page.title, page.startTime, page.resources); 
       // console.log(JSON.stringify(har, undefined, 4)); 
       fs.write(path, JSON.stringify(har), 'w'); 

       if(links.length > 0) 
       { 
        processSites(links); 
       } 
       else 
       { 
        phantom.exit(); 
       } 
      } 
     }, 10000); 
    }); 

} 

var links = ["http://stackoverflow.com", "http://marvel.com"]; 

processSites(links); 

更新:
上面的代碼生成兩個HAR文件file1.har和file2.har,但第二HAR文件還包含從兩條鏈路產生的har代碼,和它應該只有該har代碼,第一個鏈接...

通過設置var har = " "

回答

2

你不能重複在一個簡單的循環開在PhantomJS頁面,因爲page.open方法是異步的解決了這個問題。它不等待第一個站點被處理,馬上打開第二個站點。

我已將您的腳本重寫爲使用遞歸:下一個站點將在當前處理後纔會打開。 (注意:如果隊列中的任何站點將無法加載,整個過程將停止,但您可以輕鬆地重寫腳本以避免這種情況)。

if (!Date.prototype.toISOString) { 
    Date.prototype.toISOString = function() { 
     // ... 
    } 
} 

var entries = []; 

function createHAR(address, title, startTime, resources) 
{ 
    // ... 
} 

var page = require('webpage').create() 

function processSites(links) 
{ 
    page.address = links.pop(); 

    console.log("PAGE ADDRESS: ", page.address); 
    page.resources = []; 

    page.onLoadStarted = function() { 
     page.startTime = new Date(); 
    }; 
    page.onResourceRequested = function (req) { 
     page.resources[req.id] = { 
      request: req, 
      startReply: null, 
      endReply: null 
     }; 
    }; 

    page.onResourceReceived = function (res) { 
     if (res.stage === 'start') { 
      page.resources[res.id].startReply = res; 
     } 
     if (res.stage === 'end') { 
      page.resources[res.id].endReply = res; 
     } 
    }; 

    page.open(page.address, function (status) { 
     var har; 
     setTimeout(function() { 
      if (status !== 'success') { 
       console.log('FAIL to load the address'); 
       phantom.exit(1); 
      } else { 
       page.endTime = new Date(); 
       page.title = page.evaluate(function() { 
        return document.title; 
       }); 
       har = createHAR(page.address, page.title, page.startTime, page.resources); 
       console.log(JSON.stringify(har, undefined, 4)); 

       if(links.length > 0) 
       { 
        processSites(links); 
       } 
       else 
       { 
        phantom.exit(); 
       } 
      } 
     }, 10000); 
    }); 

} 

var links = ["http://edition.cnn.com", "http://stackoverflow.com"]; 

processSites(links); 
+1

我發現''http://edition.cnn.com''的har文件也將包含爲''http://stackoverflow.com「'生成的har代碼......」一種單獨輸出har代碼的方法? – Valip

+1

Vaviloff你還能幫助我嗎?我更新了這個問題。謝謝! – Valip

+1

也許'var entries = [];'應該在'createHar'函數內?沒有真正讀懂它,問題是關於迭代數組鏈接,而不是創建har文件本身。 – Vaviloff

相關問題