我正在構建一個小網頁報廢器,並且我偶然發現了以下問題:我的應用程序需要刮取網站的不同部分,並將信息放入數據庫。有時它會給出重複的條目等瘋狂的結果,或者從函數getPhoto()返回未定義的結果。但是,如果我只調用該函數(並且不運行腳本的其餘部分),它將返回正確的結果!流星不等待函數的結果,返回undefined
我有一個for循環,通過不同的URL循環。它會轉到每個URL並抓取以下信息:1.標題,2.description,3.內部鏈接,4.調用根據標題生成圖像的函數(getPhoto(...)),5.保存結果到數據庫。一切都發生在服務器(我使用cron作業,沒有客戶端交互)
for (i = 0; i < AllLinks.length; i++) {
if (AllLinks[i] != undefined && AllLinks[i] != null && sepLink[2] == "www.fly4free.pl") {
var t2 = {
travelTitle: null,
travelTitle2: null,
travelTitle3: null,
travelDescription: null,
travelDescription2: null,
travelDescription3: null,
travelBuy: null,
travelBuy2: null,
travelImage: null
};
var TravelLink1 = AllLinks[i];
result = HTTP.get(AllLinks[i], {});
$ = cheerio.load(result.content);
t2.travelTitle = $('.article__title').text();
t2.travelDescription = $('.article__content').find('p').first().text();
if ($("img[src$='//www.fly4free.pl/wp-content/uploads/2016/09/lotJm.png']").parent().attr('href') != null) {
t2.travelBuy = $("img[src$='//www.fly4free.pl/wp-content/uploads/2016/09/lotJm.png']").parent().attr('href'); // Link to buy
}
if (t2.travelBuy) {
if (t2.travelBuy.split('https://').pop().split('http://').pop() != null) {
t2.travelBuy2 = t2.travelBuy.split('https://').pop().split('http://').pop(); // link ready for DB
} else {
t2.travelBuy2 = t2.travelBuy;
}
}
t2.travelTitle3 = convertCurrencyInText(t2.travelTitle, 'PLN');
t2.travelDescription3 = convertCurrencyInText(t2.travelDescription, 'PLN');
translate(t2.travelTitle3, {from: 'pl', to: 'en'}).then(res => {
t2.travelTitle2 = res.text; // title for DB
if (t2.travelTitle2) { t2.travelImage = getPhoto(t2.travelTitle2); }
translate(t2.travelDescription3, {from: 'pl', to: 'en'}).then(response => {
t2.travelDescription2 = response.text; // description for DB
if (t2.travelDescription2 != null && t2.travelTitle2 != null && t2.travelBuy2 != null && TravelLink1 != null && t2.travelImage != null) {
Links.insert({ title: t2.travelTitle2, description:t2.travelDescription2, image: t2.travelImage, buyLink:t2.travelBuy2, link: TravelLink1, datetime: new Date() });
}
}).catch(err => {
console.error(err);
});
}).catch(err => {
console.error(err);
});
}
}
「AllLinks」包含不同的URL上。我有這個刮URL問題:http://www.fly4free.pl/na-wakacje-do-toskanii-tanie-loty-do-pizy-z-gdanska-za-170-pln/
getPhoto()函數
function getPhoto(title) {
var travelPlace = nlp(title).match('to *').out('text').replace('to','').trim();
if (travelPlace) {var travelPlace2 = travelPlace.split(' '); }
if (travelPlace2) {var travelPlace3 = travelPlace2[0] + "+" + travelPlace2[1]; }
if (travelPlace3) {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+travelPlace3+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
} else if (images.data.totalHits == 0) {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+travelPlace2[0]+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
}
} else if (nlp(title).places().data().length > 0) {
var result = nlp(title).places().data()[0].text.replace(/[^a-zA-Z ]/g, "").trim();
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+result+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
} else {
var title2 = title.replace(/[^a-zA-Z ]/g, "").split(" ");
if (title2) {
for(i = 0; i < title2.length; i++) {
if (cities[title2[i]] == 1) {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q="+title2[i]+"&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
} else {
var URL = "https://pixabay.com/api/?key="+API_KEY+"&q=travel&category=travel&orientation=horizontal";
var images = (HTTP.get(URL, {}));
if (images.data.totalHits > 0) {
var imageLink = images.data.hits[0].webformatURL;
return imageLink;
}
}
}
}
}
}
我盡力勸慰記錄結果 - 有時我從getPhoto()得到一個正確的圖像,但是從t2.travelBuy未定義鏈接,有時反之亦然。你能告訴我我做錯了什麼嗎?我看到一些人正在使用承諾或異步/等待這類問題的功能。你認爲這對我有幫助嗎?我應該如何更改我的代碼才能在沒有「未定義」的情況下刮掉網站?
「翻譯」來自「谷歌翻譯的API」包
沒有讀你的整個事情是誠實的,但簡短的答案是:這聽起來像一個異步問題。或者返回一個承諾,或者將您的函數在設置數據的函數的回調函數中獲得不正確的數據。 – dlsso