以下腳本包含links
數組中的一些URL。函數gatherLinks()
用於從links
數組中的URL的sitemap.xml中收集更多URL。一旦links
數組有足夠的URL(由變量limit
決定),函數request()
被調用,每個URL在links
數組中向服務器發送請求,獲取響應並使用page.render()
函數保存圖像。PhantomJS 2.0.0不會等待頁面加載
問題是,當我使用PhantomJS 2.0.0運行它時,許多圖像缺乏很多內容,即PhantomJS可能不會等待所有內容加載。但是當我使用PhantomJS 1.9.8時,所有內容都可以正常加載。可能是什麼原因?
var webpage = require('webpage');
var system = require('system');
var fs = require('fs');
var links = [];
links = [
"http://somesite.com",
"http://someothersite.com",
.
.
.
];
var index = 0, fail = 0, limit = 20;
finalTime = Date.now();
var gatherLinks = function(link){
var page = webpage.create();
link = link + "/sitemap.xml";
console.log("Fetching links from " + link);
page.open(link, function(status){
if(status != "success"){
console.log("Sitemap Request FAILED, status: " + status);
fail++;
return;
}
var content = page.content;
parser = new DOMParser();
xmlDoc = parser.parseFromString(content, 'text/xml');
var loc = xmlDoc.getElementsByTagName('loc');
for(var i = 0; i < loc.length; i++){
if(links.length < limit){
links[links.length] = loc[i].textContent;
} else{
console.log(links.length + " Links prepared. Starting requests.\n");
index = 0;
page.close();
request();
return;
}
}
if(index >= links.length){
index = 0;
console.log(links.length + " Links prepared\n\n");
page.close();
request();
return;
}
page.close();
gatherLinks(links[++index]);
});
};
var request = function(){
t = Date.now();
var page = webpage.create();
page.open(links[index], function(status) {
console.log('Loading link #' + (index + 1) + ': ' + links[index]);
console.log("Time taken: " + (Date.now() - t) + " msecs");
if(status != "success"){
console.log("Request FAILED, status: " + status);
fail++;
}
page.render("img_200_" + index + ".jpeg", {format: 'jpeg', quality: '100'});
if(index >= links.length-1){
console.log("\n\nAll links done, final time taken: " + (Date.now() - finalTime) + " msecs");
console.log("Requests sent: " + links.length + ", Failures: " + fail);
console.log("Success ratio: " + ((links.length - fail)/links.length)*100 + "%");
page.close();
phantom.exit();
}
index++;
page.close();
request();
});
}
gatherLinks(links[0]);
我同樣有很多麻煩讓PhantomJS和CasperJS等待整頁加載。我試圖遵循這個建議:http://stackoverflow.com/a/27472788/470749 – Ryan