From 22bf46799238b858db04a7532998d2c71917eeab Mon Sep 17 00:00:00 2001 From: drobnikj Date: Tue, 8 Jan 2019 11:39:36 +0100 Subject: [PATCH] Added google check --- package-lock.json | 23 +++++++++++++++------ package.json | 4 +++- src/infinite_scroll.js | 1 + src/main.js | 6 ++++++ src/proxy_check.js | 47 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 src/proxy_check.js diff --git a/package-lock.json b/package-lock.json index c459c1b..bcd6406 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,9 +27,9 @@ } }, "ajv": { - "version": "6.6.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.1.tgz", - "integrity": "sha512-ZoJjft5B+EJBjUyu9C9Hc0OZyPZSSlOF+plzouTrg6UlA8f+e/n8NIgBFG/9tppJtpPWfthHakK7juJdNDODww==", + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.2.tgz", + "integrity": "sha512-FBHEW6Jf5TB9MGBgUUA9XHkTbjXYfAUjY43ACMfmdMRHniyoMHjHjzD50OK8LGDWQwp4rWEsIq5kEqq7rvIM1g==", "requires": { "fast-deep-equal": "^2.0.1", "fast-json-stable-stringify": "^2.0.0", @@ -1110,6 +1110,17 @@ "uuid": "^3.3.2" } }, + "request-promise": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.2.tgz", + "integrity": "sha1-0epG1lSm7k+O5qT+oQGMIpEZBLQ=", + "requires": { + "bluebird": "^3.5.0", + "request-promise-core": "1.1.1", + "stealthy-require": "^1.1.0", + "tough-cookie": ">=2.3.3" + } + }, "request-promise-core": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.1.tgz", @@ -1178,9 +1189,9 @@ } }, "sshpk": { - "version": "1.15.2", - "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz", - "integrity": "sha512-Ra/OXQtuh0/enyl4ETZAfTaeksa6BXks5ZcjpSUNrjBr0DvrJKX+1fsKDPpT9TBXgHAFsa4510aNVgI8g/+SzA==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.0.tgz", + "integrity": "sha512-Zhev35/y7hRMcID/upReIvRse+I9SVhyVre/KTJSJQWMz3C3+G+HpO7m1wK/yckEtujKZ7dS4hkVxAnmHaIGVQ==", "requires": { "asn1": "~0.2.3", "assert-plus": "^1.0.0", diff --git a/package.json b/package.json index dcf0bdb..9b5af13 100644 --- a/package.json +++ b/package.json @@ -24,7 +24,9 @@ }, "homepage": "https://github.com/drobnikj/crawler-google-places#readme", "dependencies": { - "apify": "^0.9.13" + "apify": "^0.9.13", + "request": "^2.88.0", + "request-promise": "^4.2.2" }, "devDependencies": { "chai": "^4.2.0", diff --git a/src/infinite_scroll.js b/src/infinite_scroll.js index 315c52b..e531501 100644 --- a/src/infinite_scroll.js +++ b/src/infinite_scroll.js @@ -122,6 +122,7 @@ module.exports = async (page, maxHeight, elementToScroll = 'body') => { page.removeAllListeners('request'); logInfo(`Infinite scroll finished (${stringifyScrollInfo(scrollInfo)} resourcesStats=${JSON.stringify(resourcesStats)})`); } catch (err) { + // Infinite scroll should not break whole crawler logError('An exception thrown in infiniteScroll()', err); } }; diff --git a/src/main.js b/src/main.js index 3263fdc..4295c53 100644 --- a/src/main.js +++ b/src/main.js @@ -1,5 +1,6 @@ const Apify = require('apify'); const placesCrawler = require('./places_crawler'); +const { proxyCheck } = require('./proxy_check'); const { log } = Apify.utils; Apify.main(async () => { @@ -8,6 +9,11 @@ Apify.main(async () => { if (!searchString) throw new Error('Attribute searchString missing in input.'); + const proxyCheckResult = await proxyCheck(proxyConfig); + if (!proxyCheckResult.isPass) { + throw new Error(`Proxy error: ${proxyCheckResult.message}`); + } + log.info('Scraping Google Places for search string:', searchString); let startUrl; diff --git a/src/proxy_check.js b/src/proxy_check.js new file mode 100644 index 0000000..09a4d65 --- /dev/null +++ b/src/proxy_check.js @@ -0,0 +1,47 @@ +const Apify = require('apify'); +const rp = require('request-promise'); + +/** + * Check if user some of user proxies work for Google Maps + * @param proxyConfig + */ +const proxyCheck = async (proxyConfig) => { + const proxy = Apify.getApifyProxyUrl({ groups: proxyConfig.apifyProxyGroups }); + + // Check if user used Apify Proxy + if (!proxyConfig.useApifyProxy) { + return { + isPass: false, + message: 'Please use Apify proxy.', + }; + } + + // Check if user has access to selected proxy group + try { + await rp('https://api.apify.com/v2/browser-info/', { proxy, resolveWithFullResponse: true }); + } catch(error) { + if (error.message.includes('tunneling socket could not be established')) { + return { + isPass: false, + message: 'Please use Apify available proxy group.', + }; + } + throw error; + } + + // Check if user has access to Google Maps + const googleCheck = await rp('http://maps.google.com', { proxy, resolveWithFullResponse: true }); + if (googleCheck.statusCode !== 200) { + return { + isPass: false, + message: `One of proxy groups ${proxyConfig.apifyProxyGroups.join(',')} failed to connect to Google Maps.`, + }; + } + + return { isPass: true }; +}; + +module.exports = { + proxyCheck, +}; +