我正在寫一個內容刮板,用於刮取特定網站上襯衫的信息。我已經在Node中使用NPM軟件包設置了一切,以便創建CSV文件。我遇到的問題是,很多人都知道,Node本質上是異步的。我試圖編寫的CSV文件是在我創建的JSON對象完成創建之前編寫的(迭代每個循環來構建它),因此它傳遞給json2csv(npm包)的'fields'參數。但它將我的數據作爲空對象傳遞。任何人都可以告訴我如何告訴節點,直到我的json對象生成之前,試圖使用fs.writefile來創建CSV文件?謝謝四處逛逛Node的異步性
'use strict';
//require NPM packages
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var json2csv = require('json2csv');
//Array for shirts JSON object for json2csv to write.
var ShirtProps = [];
var homeURL = "http://www.shirts4mike.com/";
//start the scraper
scraper();
//Initial scrape of the shirts link from the home page
function scraper() {
//use the datafolderexists function to check if data is a directory
if (!DataFolderExists('data')) {
fs.mkdir('data');
}
//initial request of the home url + the shirts.php link
request(homeURL + "shirts.php", function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
//scrape each of the links for its html data
$('ul.products li').each(function(i, element){
var ShirtURL = $(this).find('a').attr('href');
console.log(ShirtURL);
//pass in each shirtURL data to be scraped to add it to an object
ShirtHTMLScraper(ShirtURL);
});
FileWrite();
// end first request
} else {
console.error(error);
}
});
}
//create function to write the CSV file.
function FileWrite() {
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Time'];
var csv = json2csv({data: ShirtProps, fields: fields});
console.log(csv);
var d = new Date();
var month = d.getMonth()+1;
var day = d.getDate();
var output = d.getFullYear() + '-' +
((''+month).length<2 ? '0' : '') + month + '-' +
((''+day).length<2 ? '0' : '') + day;
fs.writeFile('./data/' + output + '.csv', csv, function (error) {
if (error) throw error;
});
}
//function to scrape each of the shirt links and create a shirtdata object for each.
function ShirtHTMLScraper(ShirtURL) {
request(homeURL + ShirtURL, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var time = new Date().toJSON().substring(0,19).replace('T',' ');
//json array for json2csv
var ShirtData = {
title: $('title').html(),
price: $(".price").html(),
imgURL: $('img').attr('src'),
url: homeURL + ShirtURL,
time: time.toString()
};
//push the shirt data scraped into the shirtprops array
ShirtProps.push(ShirtData);
console.log(ShirtProps);
// //set the feilds in order for the CSV file
// var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Time'];
// //use json2csv to write the file -
// var csv = json2csv({data: ShirtProps, fields: fields});
// console.log(csv);
// //date for the filesystem to save the scrape with today's date.
// var d = new Date();
// var month = d.getMonth()+1;
// var day = d.getDate();
// var output = d.getFullYear() + '-' +
// ((''+month).length<2 ? '0' : '') + month + '-' +
// ((''+day).length<2 ? '0' : '') + day;
// //use filesystem to write the file, or overrite if it exists.
// fs.writeFile('./data/' + output + '.csv', csv, function (error) {
// if (error) throw error;
// }); //end writeFile
} else {
console.error(error);
}
});
}
//Check if data folder exists, source: http://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
function DataFolderExists(folder) {
try {
// Query the entry
var DataFolder = fs.lstatSync(folder);
// Is it a directory?
if (DataFolder.isDirectory()) {
return true;
} else {
return false;
}
} //end try
catch (error) {
console.error(error);
}
}
擁抱正確的javascript技術的異步性質,而不是與他們戰鬥 –