0
我想用casperjs來刮臉Facebook的主頁面。我只是修改了示例代碼,以便通過casperjs來刮取Google:使用class = fbxWelcomeBoxName標識元素,並從中檢索鏈接。當使用Casperjs刮臉Facebook時什麼也不要做
他們的示例適用於Google.com,但在Facebook上搜索時似乎無效。我聽說Facebook禁止吸引顧客。但是我的刮臉與此無關。我錯過了什麼嗎?提前致謝!
var links = [];
var casper = require('casper').create();
function getLinks() {
var links = document.querySelectorAll('.fbxWelcomeBoxName');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute("href");
});
}
casper.userAgent('Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1');
casper.start('https://www.facebook.com', function() {
// search for 'casperjs' from facebook form
this.fill('form#login_form', {
email: '***your email***',
pass: '***your password***'
}, true);
});
casper.then(function() {
// aggregate results
links = this.evaluate(getLinks);
});
casper.run(function() {
// echo results in some pretty fashion
this.echo(links.length + ' links found:');
this.echo(' - ' + links.join('\n - ')).exit();
});
結果:
[email protected]:~/tests/casperjs$ casperjs --verbose --log-level=debug --ssl-protocol=any test11.js
[info] [phantom] Starting...
[info] [phantom] Running suite: 3 steps
[debug] [phantom] opening url: https://www.facebook.com/, HTTP GET
[debug] [phantom] Navigation requested: url=https://www.facebook.com/, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "https://www.facebook.com/"
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 2/3 https://www.facebook.com/ (HTTP 200)
[info] [remote] attempting to fetch form element from selector: 'form#login_form'
[debug] [remote] Set "email" field value to [email protected]
[debug] [remote] Set "pass" field value to ********
[info] [remote] submitting form to https://www.facebook.com/login.php?login_attempt=1, HTTP POST
[info] [phantom] Step anonymous 2/3: done in 447ms.
[debug] [phantom] Navigation requested: url=https://www.facebook.com/login.php?login_attempt=1, type=FormSubmitted, willNavigate=true, isMainFrame=true
[debug] [phantom] Navigation requested: url=https://www.facebook.com/, type=FormSubmitted, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "https://www.facebook.com/"
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 3/3 https://www.facebook.com/ (HTTP 200)
[info] [phantom] Step anonymous 3/3: done in 2265ms.
[info] [phantom] Done 3 steps in 2284ms
0 links found:
-
感謝您的回覆。我嘗試了你所有的三條評論。但它似乎無法檢索元素。 [錯誤] [幻像]等待超時10000ms過期,退出。 10000毫秒等待超時,退出。 Facebook禁止通過幻影進行搜索嗎?像阻止請求,如果它檢測到HTTP請求中的瀏覽器信息爲'phantomjs'。 – 2014-12-08 04:38:56