mirror of
https://github.com/davidjohnbarton/crawler-google-places.git
synced 2025-12-12 16:38:45 +00:00
Added console logs for crawler info
This commit is contained in:
parent
642d83facc
commit
ef9aa23bce
20
src/main.js
20
src/main.js
|
|
@ -125,7 +125,6 @@ const enqueueAllUrlsFromPagination = async (page, requestQueue) => {
|
||||||
const detailLinks = [];
|
const detailLinks = [];
|
||||||
let results = await page.$$('.section-result');
|
let results = await page.$$('.section-result');
|
||||||
const resultsCount = results.length;
|
const resultsCount = results.length;
|
||||||
console.log('Titles ', results.length);
|
|
||||||
for (let resultIndex = 0; resultIndex < resultsCount; resultIndex++) {
|
for (let resultIndex = 0; resultIndex < resultsCount; resultIndex++) {
|
||||||
// Need to get results again, pupptr lost context..
|
// Need to get results again, pupptr lost context..
|
||||||
results = await page.$$('.section-result');
|
results = await page.$$('.section-result');
|
||||||
|
|
@ -133,7 +132,6 @@ const enqueueAllUrlsFromPagination = async (page, requestQueue) => {
|
||||||
await link.click();
|
await link.click();
|
||||||
await page.waitForSelector('.section-back-to-list-button');
|
await page.waitForSelector('.section-back-to-list-button');
|
||||||
const url = page.url();
|
const url = page.url();
|
||||||
console.log(url);
|
|
||||||
await requestQueue.addRequest({ url, userData: { label: 'detail' } });
|
await requestQueue.addRequest({ url, userData: { label: 'detail' } });
|
||||||
await page.click('.section-back-to-list-button');
|
await page.click('.section-back-to-list-button');
|
||||||
await sleep(5000);
|
await sleep(5000);
|
||||||
|
|
@ -144,7 +142,9 @@ const enqueueAllUrlsFromPagination = async (page, requestQueue) => {
|
||||||
Apify.main(async () => {
|
Apify.main(async () => {
|
||||||
const { searchString } = await Apify.getValue('INPUT');
|
const { searchString } = await Apify.getValue('INPUT');
|
||||||
|
|
||||||
if (!searchString) throw new Error('Attribute searchString missing in input.')
|
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
||||||
|
|
||||||
|
console.log('Scraping Google Places for search string: ', searchString);
|
||||||
|
|
||||||
const requestQueue = await Apify.openRequestQueue();
|
const requestQueue = await Apify.openRequestQueue();
|
||||||
await requestQueue.addRequest({ url: 'https://www.google.com/maps/search/', userData: { label: 'startUrl' } });
|
await requestQueue.addRequest({ url: 'https://www.google.com/maps/search/', userData: { label: 'startUrl' } });
|
||||||
|
|
@ -160,19 +160,22 @@ Apify.main(async () => {
|
||||||
handlePageTimeoutSecs: 1200,
|
handlePageTimeoutSecs: 1200,
|
||||||
handlePageFunction: async ({ request, page }) => {
|
handlePageFunction: async ({ request, page }) => {
|
||||||
const { label } = request.userData;
|
const { label } = request.userData;
|
||||||
|
console.log(`Open ${request.url} with label: ${label}`);
|
||||||
|
|
||||||
if (label === 'startUrl') {
|
if (label === 'startUrl') {
|
||||||
// Enqueue all urls for place detail
|
// Enqueue all urls for place detail
|
||||||
await page.type('#searchboxinput', 'Česká Spořitelna');
|
await page.type('#searchboxinput', searchString);
|
||||||
await sleep(5000);
|
await sleep(5000);
|
||||||
await page.click('#searchbox-searchbutton');
|
await page.click('#searchbox-searchbutton');
|
||||||
await sleep(5000);
|
await sleep(5000);
|
||||||
while(true) {
|
while(true) {
|
||||||
|
const paginationText = await page.$eval('.section-pagination-right', el => el.innerText);
|
||||||
|
console.log(`Added links from pagination: ${paginationText}`);
|
||||||
await page.waitForSelector('#section-pagination-button-next', { timeout: DEFAULT_TIMEOUT });
|
await page.waitForSelector('#section-pagination-button-next', { timeout: DEFAULT_TIMEOUT });
|
||||||
await enqueueAllUrlsFromPagination(page, requestQueue);
|
await enqueueAllUrlsFromPagination(page, requestQueue);
|
||||||
const nextButton = await page.$('#section-pagination-button-next');
|
const nextButton = await page.$('#section-pagination-button-next');
|
||||||
const isNextPagination = (await nextButton.getProperty('disabled') === 'true');
|
const isNextPaginationDisabled = (await nextButton.getProperty('disabled') === 'true');
|
||||||
console.log('isNextPagination ', isNextPagination);
|
if (isNextPaginationDisabled) {
|
||||||
if (isNextPagination) {
|
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
await nextButton.click();
|
await nextButton.click();
|
||||||
|
|
@ -195,7 +198,7 @@ Apify.main(async () => {
|
||||||
});
|
});
|
||||||
placeDetail.url = request.url;
|
placeDetail.url = request.url;
|
||||||
placeDetail.reviews = [];
|
placeDetail.reviews = [];
|
||||||
console.log(placeDetail)
|
console.log(placeDetail);
|
||||||
// Get all reviews
|
// Get all reviews
|
||||||
await page.click('button.section-reviewchart-numreviews');
|
await page.click('button.section-reviewchart-numreviews');
|
||||||
await infiniteScroll(page, 99999999999);
|
await infiniteScroll(page, 99999999999);
|
||||||
|
|
@ -221,6 +224,7 @@ Apify.main(async () => {
|
||||||
}
|
}
|
||||||
await Apify.pushData(placeDetail);
|
await Apify.pushData(placeDetail);
|
||||||
}
|
}
|
||||||
|
console.log('Done ', request.url);
|
||||||
},
|
},
|
||||||
maxConcurrency: 1,
|
maxConcurrency: 1,
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user