0
我想去this page並從每個鏈接颳去每篇論文的'標題'和'作者'。到目前爲止,我有這個(我的問題,我需要幫助的人士在內部代碼中的註釋):需要CasperJS刮助手
var utils = require('utils');
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
},
clientScripts: ['lib/jquery.min.js']
});
var i = 0;
var links = [];
var thesis_data = [];
function getThesisLinks() {
var links = document.querySelectorAll('');//Not sure what should go in ('')
return [].map.call(links, function(link) {
return link.getAttribute('href');
});
}
function loopThroughThesisLinks() {
// Recurses until all links are processed
if (i < links.length) {
this.echo('[LINK #' + i + '] ' + links[i]);
getThesisData.call(this, links[i]);
i++;
this.run(loopThroughThesisLinks);
} else {
utils.dump(thesis_data);
this.exit();
}
}
function getThesisData(link) {
this.start(link, function() {
// Get title of thesis - not sure what element to insert for this.fetchText
var title = this.fetchText('');
// Get name of authors - not sure what element to insert for this.fetchText
var author = this.fetchText('');
// Add the title & author data to the thesis_data array
var data = {
title: title,
author: author
};
thesis_data.push(data);
});
}
casper.start('http://ses.library.usyd.edu.au/handle/2123/345/browse?type=dateissued&sort_by=2&order=DESC&rpp=1495&etal=0&submit_browse=Update', function() {
links = this.evaluate(getThesisLinks);
// Convert relative links to absolute URLs
for (var i = 0; i < links.length; i++) {
links[i] = "http://ses.library.usyd.edu.au/handle/" + links[i];
}
utils.dump(links);
});
casper.run(loopThroughThesisLinks);
任何援助將不勝感激。
提供一些更多的細節,像什麼問題。 – AsTeR