From 5d824b5b2841c01b36e872b130d5a9faa2bbd0b9 Mon Sep 17 00:00:00 2001 From: JakubDrobnik Date: Fri, 23 Nov 2018 12:23:59 +0100 Subject: [PATCH] Omit owner responses from review text --- src/places_crawler.js | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/places_crawler.js b/src/places_crawler.js index c276894..52bd491 100644 --- a/src/places_crawler.js +++ b/src/places_crawler.js @@ -49,33 +49,34 @@ const setUpCrawler = (launchPuppeteerOptions, requestQueue) => { // Get all reviews await page.click('button.section-reviewchart-numreviews'); await page.waitForSelector('.section-star-display', { timeout: DEFAULT_TIMEOUT }); - await sleep(2000); + await sleep(5000); // Sort reviews by newest - try { - await page.waitForSelector('.section-tab-info-stats-button-flex', { timeout: DEFAULT_TIMEOUT }); - await page.click('.section-tab-info-stats-button-flex'); - await page.waitForSelector('.context-menu-entry[data-index="1"]', { timeout: DEFAULT_TIMEOUT }); - await page.click('.context-menu-entry[data-index="1"]'); - } catch (err) { - console.log(`For ${request.url} we can not change sorting of reviews.`); - } + await page.click('.section-tab-info-stats-button-flex'); + await sleep(5000); + await page.waitForSelector('.context-menu-entry[data-index="1"]', { timeout: DEFAULT_TIMEOUT }); + await page.click('.context-menu-entry[data-index="1"]'); await infiniteScroll(page, 99999999999, '.section-scrollbox.section-listbox'); const reviewEls = await page.$$('div.section-review'); for (const reviewEl of reviewEls) { const moreButton = await reviewEl.$('.section-expand-review'); if (moreButton) { await moreButton.click(); - await sleep(1000); + await sleep(2000); } const review = await page.evaluate((reviewEl) => { const $review = $(reviewEl); - return { + const reviewData = { name: $review.find('.section-review-title').text().trim(), - text: $review.find('.section-review-text').text(), + text: $review.find('.section-review-review-content .section-review-text').text(), stars: $review.find('.section-review-stars').attr('aria-label').trim(), publishAt: $review.find('.section-review-publish-date').text().trim(), likesCount: $review.find('.section-review-thumbs-up-count').text().trim(), }; + const $response = $review.find('.section-review-owner-response'); + if ($response) { + reviewData.responseFromOwnerText = $response.find('.section-review-text').text().trim(); + } + return reviewData; }, reviewEl); placeDetail.reviews.push(review); }