2016-12-30 74 views
0

我正在運行一個需要某些亞馬遜書籍的預覽內容(它們可以是PNG圖像或html內容)的小項目。CasperJS不適用於亞馬遜iframe

例如,這本書:https://www.amazon.com/gp/product/B00JNYEXCK/

當點擊「Look inside」徽章(id =「sitbLogoImg」的img標籤)時,會出現一個新框架,顯示本書的預覽內容。它有2個版本,打印預覽(這是PNG圖像,這些我可以抓住)和點燃預覽(這是iframe文件)。

我堅持使用iframe針對Kindle預覽,基本上是這樣的:

<div id="scrollElm-0" class="pageHtml"> 
 
    <div id="sitbReaderKindleSample"> 
 
    <iframe id="sitbReaderFrame"> 
 
     <html> 
 
     <head></head> 
 
     <body> 
 
     <p>.......</p> 
 
     <div>......</div> 
 
     .... 
 
     </body> 
 
     </html> 
 
    </iframe> 
 
    </div> 
 
</div>

這裏是我的CasperJS腳本:

var fs = require('fs'); 
 
var casper = require('casper').create({ 
 
    pageSettings: { 
 
    loadPlugins: false, 
 
    userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36' 
 
    } 
 
}); 
 
casper.options.viewportSize = { 
 
    width: 1366, 
 
    height: 768 
 
}; 
 
casper.options.waitTimeout = 10000; 
 

 
// use any cookies 
 
var cookieFilename = "cookies.txt"; 
 
var data = fs.read(cookieFilename); 
 
if (data) { 
 
    phantom.cookies = JSON.parse(data); 
 
} 
 

 
casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() { 
 
    this.echo(this.status(true)); 
 
    this.captureSelector('before.png', 'html'); 
 
}); 
 
casper.waitForSelector('img#sitbLogoImg', function() { 
 
    //this.captureSelector('before.png','html'); 
 
}); 
 
casper.then(function() { 
 
    this.click('img#sitbLogoImg'); 
 
}); 
 
casper.waitForSelector('div#sitbLBHeader', function() { 
 

 
}); 
 
var lis_content = ''; 
 
casper.wait(3000, function() { 
 
    this.captureSelector('after.png', 'html'); 
 
}); 
 
casper.withFrame(1, function() { 
 
    lis_content = this.getHTML(); 
 
    this.captureSelector('lis_content.png', 'html'); 
 
}); 
 

 
//Write the sitbReaderFrame to file 
 
casper.then(function() { 
 
    var lis_content_filename = 'lis_content.html'; 
 
    fs.write(lis_content_filename, lis_content, 644); 
 
}); 
 

 
// write the cookies 
 
casper.wait(1000, function() { 
 
    var cookies = JSON.stringify(phantom.cookies); 
 
    fs.write(cookieFilename, cookies, 644); 
 
}); 
 
casper.run();

問題是iframe只有id =「sitbReaderFrame」,但沒有名稱,我試過casperjs.withFrame與幀索引號從0到4,但它似乎不在CapserJS視圖中退出。

我想聽聽你的任何建議,因爲我真的被困在這裏。非常感謝你,對我英語不好的話感到抱歉。

回答

1

CasperJS腳本:

function on_init (page){ 
 
var width='1600',height='900'; 
 

 
page.viewportSize = {width:width,height:height} 
 
page.evaluate(function (width,height){ 
 
screen = {width:width,height:height,availWidth:width,availHeight:height}; 
 
innerWidth=width; innerHeight=height; outerWidth=width; outerHeight=height; 
 
window.navigator = { 
 
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}, 
 
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}}, 
 
appCodeName: "Mozilla", 
 
appName: "Netscape", 
 
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36", 
 
cookieEnabled: 1, 
 
languages: "en-US,en", 
 
language: "en", 
 
onLine: 1, 
 
doNotTrack: null, 
 
platform: "Linux x86_64", 
 
product: "Gecko", 
 
vendor: "Google Inc.", 
 
vendorSub: "", 
 
productSub: 20030107, 
 
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36", 
 
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}}, 
 
javaEnabled: function javaEnabled(){return 0} };},width,height);}; 
 

 
var casper = require('casper').create({ 
 
    verbose: true, 
 
    logLevel: 'debug', 
 
    waitTimeout: 5000, 
 
    userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36' 
 
}), fs = require('fs'); 
 

 
casper 
 
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") }) 
 
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") }) 
 
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") }) 
 
.on('page.initialized', on_init) 
 

 
    .start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){ 
 
    this.click('#ebooksSitbLogoImg'); 
 
    this 
 
    .capture('lis.png') 
 
    .wait(3000,function(){ 
 
    var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r; 
 
    for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;}); 
 
    this 
 
    .echo("The index is: "+index,"INFO") 
 
    .capture('lis_content.png') 
 
    .withFrame(index,function(){ 
 
    fs.write('lis_content.html', this.getHTML(), 644); 
 
    }) 
 
}) 
 
}) 
 
     .run();
You need to use the --cookies-file option, to avoid blocking.

./casperjs --cookies-file=./ cookies_1.txt casis.js >/dev/stdout

如果將打印:

error: CasperError: Cannot dispatch mousedown event on nonexistent selector: #ebooksSitbLogoImg

無論如何都無法避免阻塞。

In that case
Try again after reconnecting to the internet and getting new IP address.
+0

可能有用:[?滾動與JavaScript的iframe](http://stackoverflow.com/questions/1192228/scrolling-an-iframe-with-javascript) | [CasperJS ScrollTo](http://docs.casperjs.org/en/latest/modules/casper.html#scrollto) – 2016-12-30 15:08:51