mirror of
https://github.com/davidjohnbarton/crawler-google-places.git
synced 2025-12-12 16:38:45 +00:00
Added searchViewport to input
This commit is contained in:
parent
ef9aa23bce
commit
6769de702e
22
src/main.js
22
src/main.js
|
|
@ -140,14 +140,25 @@ const enqueueAllUrlsFromPagination = async (page, requestQueue) => {
|
||||||
};
|
};
|
||||||
|
|
||||||
Apify.main(async () => {
|
Apify.main(async () => {
|
||||||
const { searchString } = await Apify.getValue('INPUT');
|
const { searchString, searchViewport } = await Apify.getValue('INPUT');
|
||||||
|
|
||||||
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
||||||
|
|
||||||
console.log('Scraping Google Places for search string: ', searchString);
|
console.log('Scraping Google Places for search string:', searchString);
|
||||||
|
|
||||||
|
let startUrl;
|
||||||
|
if (searchViewport) {
|
||||||
|
const { lat, lng, zoom = 10 } = searchViewport
|
||||||
|
if (!lat || !lng) throw new Error('You have to defined lat and lng for searchViewport!');
|
||||||
|
startUrl = `https://www.google.com/maps/@${lat},${lng},${zoom}z/search`;
|
||||||
|
} else {
|
||||||
|
startUrl = 'https://www.google.com/maps/search/';
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Start url is ', startUrl);
|
||||||
|
|
||||||
const requestQueue = await Apify.openRequestQueue();
|
const requestQueue = await Apify.openRequestQueue();
|
||||||
await requestQueue.addRequest({ url: 'https://www.google.com/maps/search/', userData: { label: 'startUrl' } });
|
await requestQueue.addRequest({ url: startUrl, userData: { label: 'startUrl' } });
|
||||||
|
|
||||||
const crawler = new Apify.PuppeteerCrawler({
|
const crawler = new Apify.PuppeteerCrawler({
|
||||||
launchPuppeteerOptions: {
|
launchPuppeteerOptions: {
|
||||||
|
|
@ -157,7 +168,7 @@ Apify.main(async () => {
|
||||||
liveView: Apify.isAtHome(),
|
liveView: Apify.isAtHome(),
|
||||||
},
|
},
|
||||||
requestQueue,
|
requestQueue,
|
||||||
handlePageTimeoutSecs: 1200,
|
handlePageTimeoutSecs: 1800, // We are adding all links to queue on startUrl
|
||||||
handlePageFunction: async ({ request, page }) => {
|
handlePageFunction: async ({ request, page }) => {
|
||||||
const { label } = request.userData;
|
const { label } = request.userData;
|
||||||
console.log(`Open ${request.url} with label: ${label}`);
|
console.log(`Open ${request.url} with label: ${label}`);
|
||||||
|
|
@ -214,7 +225,7 @@ Apify.main(async () => {
|
||||||
return {
|
return {
|
||||||
name: $review.find('.section-review-title').text().trim(),
|
name: $review.find('.section-review-title').text().trim(),
|
||||||
text: $review.find('.section-review-text').text(),
|
text: $review.find('.section-review-text').text(),
|
||||||
stars: $review.find('.section-review-stars').attr('aria-label'),
|
stars: $review.find('.section-review-stars').attr('aria-label').trim(),
|
||||||
publishAt: $review.find('.section-review-publish-date').text().trim(),
|
publishAt: $review.find('.section-review-publish-date').text().trim(),
|
||||||
likesCount: $review.find('.section-review-thumbs-up-count').text().trim(),
|
likesCount: $review.find('.section-review-thumbs-up-count').text().trim(),
|
||||||
};
|
};
|
||||||
|
|
@ -226,7 +237,6 @@ Apify.main(async () => {
|
||||||
}
|
}
|
||||||
console.log('Done ', request.url);
|
console.log('Done ', request.url);
|
||||||
},
|
},
|
||||||
maxConcurrency: 1,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
await crawler.run();
|
await crawler.run();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user