2016-07-07 67 views
1

我一直在努力與大部分時間。簡而言之,我試圖通過Node.js模塊與PhantomJS一起登錄亞馬遜。我的問題的簡短版本是,亞馬遜給我一個消息,說需要cookies來使用該網站。PhantomJS永久Cookie和Javascript

這裏是我目前的資源......

NPM's phantom module

Working example of logging into Amazon using PhantomJS

SO question addressing persistent cookies in PhantomJS

Another SO question about cookies set by Javascript

這最後一個問題是特別有趣,因爲第一個答案解決了用戶代理(我已經嘗試過至少3次或4結果相同),而第二個答案指向我認爲可能是我的問題。總之,亞馬遜可能試圖通過javascript設置測試cookie,然後檢查cookie是否設置成功,以確定用戶是否允許cookie。我可以成功地確認我的cookie文件正在創建,並且亞馬遜已經在文件中設置了cookie,但是當提交登錄表單時顯然似乎不夠用,因爲在下一頁我被cookie警告阻止。這讓我相信最後一個問題中的用戶是正確的 - 我的網頁的Javascript並沒有被解僱,儘管試圖確保它是。

最後,我的page.render顯示了一條亞馬遜消息,說我需要啓用cookie才能繼續。這裏是我的代碼...

'use strict'; 

/** 
* Module dependencies. 
*/ 
var mongoose = require('mongoose'), 
    phantom = require('phantom'), 
    // Admin = mongoose.model('Admin'), 
    Item = mongoose.model('Item'), 
    config = require('../config/config'); 


/* 
* Check function. 
*/ 
module.exports.check= function() { 
    var loadInProgress = false, 
    interval = '', 
    testindex = 0, 
    cookiePath = __dirname + 'cookies.txt', 
    url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome', 
    tag = config.defaultAffiliateTag, 
    periodType = 'preSelected', 
    preSelectedPeriod = 'yesterday', 
    // url2 is for order data 
    url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX', 
    // url3 is for earnings data 
    url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX'; 

    phantom.create([/* '--debug=true', */ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false', '--cookies-file=' + cookiePath]).then(function(ph) { 
    ph.createPage().then(function(page) { 

     page.on('onLoadStarted', function() { 
     loadInProgress = true; 
     }); 

     page.on('onLoadFinished', function(response) { 
     if (response === 'success') { 
      loadInProgress = false; 
     } else { 
      console.log('Phantom page failed to load.'); 
     } 
     }); 

     page.on('onError', function(msg, trace) { 
     var msgStack = ['ERROR: ' + msg]; 
     if (trace && trace.length) { 
      msgStack.push('TRACE:'); 
      trace.forEach(function(t) { 
      msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : '')); 
      }); 
     } 
     console.error(msgStack.join('\n')); 
     }); 

     page.on('onResourceError', function(resourceError) { 
     console.log('= onResourceError()'); 
     console.log(' - unable to load url: "' + resourceError.url + '"'); 
     console.log(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString); 
     loadInProgress = false; 
     }); 

     var steps = [ 
     // Step 1 
     function() { 
      // Load the initial login page. 
      console.log('--- JAVASCRIPT ---') 

      // This is where I try to ensure my page has Javascript Enabled. 
      // val outputs true here. 
      page.setting('javascriptEnabled').then(function(val) { 
      console.log('val: ' + val); 
      page.setting('settings.userAgent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'); 
      loadInProgress = true; 
      page.open(url); 
      }) 
     }, 
     // Step 2 
     function() { 
      // Update username/password. 
      page.evaluate(function() { 
      document.getElementById('ap_email').value = 'XXXX'; 
      document.getElementById('ap_password').value = 'XXXX'; 
      }); 
     }, 
     // Step 3 
     function() { 
      // Login. 
      loadInProgress = true; 
      page.evaluate(function() { 
      document.forms['signIn'].submit(); 
      }); 
     }, 
     // Step 4 
     function() { 
      loadInProgress = true; 
      page.open(url2); 
     } 
     ]; 

     var interval = setInterval(function() { 
     if (!loadInProgress && typeof steps[testindex] === 'function') { 
      steps[testindex](); 
      console.log('Test Index: ' + (testindex + 1)); 
      page.render('config/images/step' + (testindex + 1) + '.png'); 
      testindex++; 
     } 
     if (typeof steps[testindex] !== 'function') { 
      clearInterval(interval); 
      setTimeout(function() { 
      ph.exit(); 
      }, 5000); 
     } 
     }, 50); 
    }); 
    }); 
}; 

我得到什麼作爲這樣的結果是輸出如下:

--- JAVASCRIPT --- 
    Test Index: 1 
    val: true 
    Test Index: 2 
    Test Index: 3 
    Test Index: 4 
    = onResourceError() 
     - unable to load url: "https://sentry.amazon.com/SSO/redirect?response_typ 
e=id_token&client_id=affiliate-program.amazon.com%3A443&redirect_uri=https%3A%2F 
%2Faffiliate-program.amazon.com%3A443%2Fhome%2Freports%2Ftable.json%3Fquery%255B 
type%255D%3Dorders%26query%255Bstart_date%255D%3D2016-05-28%26query%255Bend_date 
%255D%3D2016-06-26%26query%255Btag_id%255D%3D189318233%26query%255Bdevice_type%2 
55D%3Dall%26query%255Blast_accessed_row_index%255D%3D0%26query%255Bcolumns%255D% 
3Dtitle%252Casin%252Ccategory%252Cclicks%252Cconversion%252Cseller%252Cdqty%252C 
nqty%252Cqty%26query%255Bskip%255D%3D0%26query%255Bsort%255D%3Dasin%26query%255B 
limit%255D%3D25%26store_id%3XXXX&scope=openid&nonce=5d8a3f10bb3746c799 
a05a927b0204f3c0629d5c8c5646bb49ccdcd93f07247e&sentry_handler_version=TomcatSSOF 
ilter-1.1-1" 
     - error code: 5, description: Operation canceled 
    Phantom page failed to load. 

任何人都可以開導我,我可能會錯過了什麼?

回答

1

它似乎是PhantomJS 2.1.1(由NPM模塊實現的版本)或NPM模塊本身的問題。

我完全用Horseman和PhantomJS 2.0.0重寫了這個腳本,並立即開始工作。對於未來的後代,以下是工作實施。我只有一天進入騎士階段,我已經喜歡比我用過的任何其他Phantom包裝更乾淨的鏈式執行。

'use strict'; 

/** 
* Module dependencies. 
*/ 
var mongoose = require('mongoose'), 
    Horseman = require('node-horseman'), 
    phPath = __dirname + '\\phantomjs-2.0.0-windows\\bin\\phantomjs.exe', 
    Item = mongoose.model('Item'), 
    config = require('../config/config'); 


/* 
* Check function. 
*/ 
module.exports.updateItems = function() { 
    var cookiePath = __dirname + 'cookies.txt', 
    url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome', 
    tag = config.defaultAffiliateTag, 
    periodType = 'preSelected', 
    preSelectedPeriod = 'yesterday', 
    // url2 is for order data 
    url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX', 
    // url3 is for earnings data 
    url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX'; 

    var horseman = new Horseman({ 
    cookiesFile: cookiePath, 
    ignoreSSLErrors: true, 
    sslProtocol: 'any', 
    webSecurity: false, 
    timeout: 15000, 
    phantomPath: phPath 
    }); 

    horseman 
    .userAgent('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36') 
    .authentication('XXXX', 'XXXX') 
    .on('consoleMessage', function(msg) { 
     console.log(msg); 
    }) 
    .on('error', function(msg, trace) { 
     var msgStack = ['ERROR: ' + msg]; 
     if (trace && trace.length) { 
     msgStack.push('TRACE:'); 
     trace.forEach(function(t) { 
      msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : '')); 
     }); 
     } 
     console.error(msgStack.join('\n')); 
    }) 
    .open(url) 
    .screenshot('config/images/step1.png') 
    .waitForSelector('#ap_email') 
    .value('#ap_email', 'XXXX') 
    .waitForSelector('#ap_password') 
    .value('#ap_password', 'XXXX') 
    .screenshot('config/images/step2.png') 
    .click('#signInSubmit') 
    .waitForNextPage() 
    .screenshot('config/images/step3.png') 
    .open(url2) 
    .screenshot('config/images/step4.png') 
    .plainText() 
    .then(function(txt) { 
     console.log('Page results: '); 
     console.dir(txt); 
     return; 
    }) 
    .open(url3) 
    .screenshot('config/images/step5.png') 
    .plainText() 
    .then(function(txt) { 
     console.log('Page results: '); 
     console.dir(txt); 
     return; 
    }) 
    .close(); 
}; 

祝你好運!

+0

有趣!如果必須有邏輯判斷/邏輯判斷,那麼人們如何處理Horseman鏈接腳本? – Vaviloff

+1

@Vaviloff很好的問題。 Horseman擁有一個'do'函數(https://github.com/johntitus/node-horseman#dofn),它允許您在不破壞鏈條的情況下運行任意函數。據我瞭解,結果傳遞給鏈中的下一個函數,所以你可以運行'.do(function(){return stuff;})。然後(function(stuffFromDo){return moreStuff;});'Pretty光滑,如果你問我。 – aikorei

0

我最近面臨同樣的問題,簡單的解決方案是將用戶添加到新創建的網頁。如果您使用的是phantomjs-node模塊,那麼這裏是代碼。

page.setting("userAgent", "your user agent here");