From ddf08817be7f070bdcea80a61f72c06f8d0491a4 Mon Sep 17 00:00:00 2001
From: drobnikj <drobnik.j@gmail.com>
Date: Thu, 10 Jan 2019 14:37:45 +0100
Subject: [PATCH] Updated comments and readme

---
 README.md                     | 112 +++++++++++++++++++++++++++++++++-
 src/consts.js                 |   1 -
 src/enqueue_places_crawler.js |   8 +--
 src/places_crawler.js         |  12 ++--
 src/proxy_check.js            |   5 +-
 5 files changed, 122 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 130fd0e..d6fbc97 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,10 @@ Get data from Google Places, which official [Google Maps Places API](https://dev
 
 ## Why?
 You can use official [Google Maps Places API](https://developers.google.com/places/web-service/search), it is better way for the most use cases.
-But API doesn't provide:
 
-- Popular place times histogram
+Unlike Google Maps Places API, you can get from crawler:
+
+- Popular place times histogram (There is no data for that in official API)
 - Place reviews (you can get up to 5 reviews from official API)
 - Place photos (you can can up to 10 photos from official API)
 
@@ -32,3 +33,110 @@ On this input actor searches places on this start url: https://www.google.com/ma
 
 ## OUTPUT
 Once the actor finishes, it outputs results to actor default dataset.
+
+Example results item:
+
+```text
+{
+  "title": "Scotiabank",
+  "totalScore": 3.7,
+  "categoryName": "Bank",
+  "address": "201 Bishopsgate, London EC2M 3NS, UK",
+  "plusCode": "GWCC+75 City of London, London, UK",
+  "popularTimesHistogram": {
+    "Su": [],
+    "Mo": [
+      {
+        "hour": 6,
+        "occupancyPercent": 0
+      },
+      {
+        "hour": 7,
+        "occupancyPercent": 0
+      },
+      {
+        "hour": 8,
+        "occupancyPercent": 0
+      },
+      {
+        "hour": 9,
+        "occupancyPercent": 75
+      },
+      {
+        "hour": 10,
+        "occupancyPercent": 73
+      },
+      {
+        "hour": 11,
+        "occupancyPercent": 60
+      },
+      {
+        "hour": 12,
+        "occupancyPercent": 57
+      },
+      {
+        "hour": 13,
+        "occupancyPercent": 56
+      },
+      {
+        "hour": 14,
+        "occupancyPercent": 56
+      },
+      {
+        "hour": 15,
+        "occupancyPercent": 57
+      },
+      {
+        "hour": 16,
+        "occupancyPercent": 50
+      },
+      {
+        "hour": 17,
+        "occupancyPercent": 33
+      },
+      {
+        "hour": 18,
+        "occupancyPercent": 14
+      },
+      {
+        "hour": 19,
+        "occupancyPercent": 4
+      },
+      {
+        "hour": 20,
+        "occupancyPercent": 1
+      },
+      {
+        "hour": 21,
+        "occupancyPercent": 0
+      },
+      {
+        "hour": 22,
+        "occupancyPercent": 0
+      },
+      {
+        "hour": 23,
+        "occupancyPercent": 0
+      }
+    ],
+    ...
+  },
+  "reviews": [
+    {
+      "name": "NELLORE BALA NAVEEN REDDY",
+      "text": "nice  bank in london",
+      "stars": "5 stars",
+      "publishAt": "2 months ago",
+      "likesCount": "",
+      "responseFromOwnerText": ""
+    },
+    ...
+  ],
+  "reviewsCount": 6,
+  "imageUrls": [
+    "https://lh5.googleusercontent.com/p/AF1QipPvm-rzo7_mlLRmctQwDJV6agVGHZMUJYLinU_t=s508-k-no",
+    ...
+    ],
+  "url": "https://www.google.com/maps/place/Scotiabank/@51.5258542,-0.335595,11z/data=!4m8!1m2!2m1!1sbanks+london!3m4!1s0x48761cb181573665:0x5fce6a25f2e99723!8m2!3d51.5206306!4d-0.0795672"
+}
+```
diff --git a/src/consts.js b/src/consts.js
index 08bbdf6..3f7ec94 100644
--- a/src/consts.js
+++ b/src/consts.js
@@ -3,4 +3,3 @@ exports.DEFAULT_TIMEOUT = 60 * 1000; // 60 sec
 
 exports.LISTING_PAGINATION_KEY = 'listingState';
 exports.MAX_PAGE_RETRIES = 5;
-
diff --git a/src/enqueue_places_crawler.js b/src/enqueue_places_crawler.js
index 3f846d0..cea7929 100644
--- a/src/enqueue_places_crawler.js
+++ b/src/enqueue_places_crawler.js
@@ -36,7 +36,7 @@ const enqueueAllUrlsFromPagination = async (page, requestQueue, paginationFrom,
 };
 
 /**
- * Adds all places from listing to queue
+ * Method adds places from listing to queue
  * @param page
  * @param searchString
  * @param requestQueue
@@ -55,10 +55,10 @@ const enqueueAllPlaceDetails = async (page, searchString, requestQueue, maxPlace
     try {
         await page.waitForSelector('h1.section-hero-header-title');
     } catch (e) {
-        // It can happen, if there are listing, not just detail page
+        // It can happen if there is list of details.
     }
 
-    // In case there is no listing, put just detail page to queue
+    // In case there is not list of details, it enqueues just detail page
     const maybeDetailPlace = await page.$('h1.section-hero-header-title');
     if (maybeDetailPlace) {
         const url = page.url();
@@ -66,7 +66,7 @@ const enqueueAllPlaceDetails = async (page, searchString, requestQueue, maxPlace
         return;
     }
 
-    // In case there is listing, go through all details, limits with maxPlacesPerCrawl
+    // In case there is a list of details, it goes through details, limits by maxPlacesPerCrawl
     const nextButtonSelector = '[jsaction="pane.paginationSection.nextPage"]';
     let isFinished;
     while (true) {
diff --git a/src/places_crawler.js b/src/places_crawler.js
index a305dde..b76c537 100644
--- a/src/places_crawler.js
+++ b/src/places_crawler.js
@@ -11,7 +11,7 @@ const { enqueueAllPlaceDetails } = require('./enqueue_places_crawler');
  * @param page
  */
 const extractPlaceDetail = async (page) => {
-    // Extracts basic information
+    // Extract basic information
     const titleSel = 'h1.section-hero-header-title';
     await page.waitForSelector(titleSel, { timeout: DEFAULT_TIMEOUT });
     const detail = await page.evaluate(() => {
@@ -24,7 +24,7 @@ const extractPlaceDetail = async (page) => {
         };
     });
 
-    // Extracty histogram for popular times
+    // Extract histogram for popular times
     const histogramSel = '.section-popular-times';
     if (await page.$(histogramSel)) {
         detail.popularTimesHistogram = await page.evaluate(() => {
@@ -59,7 +59,7 @@ const extractPlaceDetail = async (page) => {
         });
     }
 
-    // Extracts reviews
+    // Extract reviews
     detail.reviews = [];
     const reviewsButtonSel = 'button[jsaction="pane.reviewChart.moreReviews"]';
     if (detail.totalScore) {
@@ -117,7 +117,7 @@ const extractPlaceDetail = async (page) => {
         await page.click('button.section-header-back-button');
     }
 
-    // Extracts place images
+    // Extract place images
     await page.waitForSelector(titleSel, { timeout: DEFAULT_TIMEOUT });
     const imagesButtonSel = '.section-image-pack-image-container';
     const imagesButton = await page.$(imagesButtonSel);
@@ -153,8 +153,8 @@ const setUpCrawler = (launchPuppeteerOptions, requestQueue, maxCrawledPlaces) =>
         requestQueue,
         maxRequestRetries: MAX_PAGE_RETRIES,
         retireInstanceAfterRequestCount: 10,
-        handlePageTimeoutSecs: 15 * 60, // 15 min because startUrl enqueueing
-        maxOpenPagesPerInstance: 1, // Because startUrl enqueueing crashes if we mixed tabs with details scraping
+        handlePageTimeoutSecs: 15 * 60, // long timeout, because of startUrl enqueueing
+        maxOpenPagesPerInstance: 1, // Because of startUrl enqueueing crashes if we mix tabs with another scraping
     };
     if (maxCrawledPlaces) {
         crawlerOpts.maxRequestsPerCrawl = maxCrawledPlaces + 1; // The first one is startUrl
diff --git a/src/proxy_check.js b/src/proxy_check.js
index 09a4d65..a02f678 100644
--- a/src/proxy_check.js
+++ b/src/proxy_check.js
@@ -2,13 +2,13 @@ const Apify = require('apify');
 const rp = require('request-promise');
 
 /**
- * Check if user some of user proxies work for Google Maps
+ * Check if some of proxies work for Google Maps
  * @param proxyConfig
  */
 const proxyCheck = async (proxyConfig) => {
     const proxy = Apify.getApifyProxyUrl({ groups: proxyConfig.apifyProxyGroups });
 
-    // Check if user used Apify Proxy
+    // Check if user uses Apify Proxy
     if (!proxyConfig.useApifyProxy) {
         return {
             isPass: false,
@@ -44,4 +44,3 @@ const proxyCheck = async (proxyConfig) => {
 module.exports = {
     proxyCheck,
 };
-