Added google check

This commit is contained in:
drobnikj 2019-01-08 11:39:36 +01:00
parent 9918348e29
commit 22bf467992
5 changed files with 74 additions and 7 deletions

23
package-lock.json generated
View File

@ -27,9 +27,9 @@
}
},
"ajv": {
"version": "6.6.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.1.tgz",
"integrity": "sha512-ZoJjft5B+EJBjUyu9C9Hc0OZyPZSSlOF+plzouTrg6UlA8f+e/n8NIgBFG/9tppJtpPWfthHakK7juJdNDODww==",
"version": "6.6.2",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.2.tgz",
"integrity": "sha512-FBHEW6Jf5TB9MGBgUUA9XHkTbjXYfAUjY43ACMfmdMRHniyoMHjHjzD50OK8LGDWQwp4rWEsIq5kEqq7rvIM1g==",
"requires": {
"fast-deep-equal": "^2.0.1",
"fast-json-stable-stringify": "^2.0.0",
@ -1110,6 +1110,17 @@
"uuid": "^3.3.2"
}
},
"request-promise": {
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.2.tgz",
"integrity": "sha1-0epG1lSm7k+O5qT+oQGMIpEZBLQ=",
"requires": {
"bluebird": "^3.5.0",
"request-promise-core": "1.1.1",
"stealthy-require": "^1.1.0",
"tough-cookie": ">=2.3.3"
}
},
"request-promise-core": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.1.tgz",
@ -1178,9 +1189,9 @@
}
},
"sshpk": {
"version": "1.15.2",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz",
"integrity": "sha512-Ra/OXQtuh0/enyl4ETZAfTaeksa6BXks5ZcjpSUNrjBr0DvrJKX+1fsKDPpT9TBXgHAFsa4510aNVgI8g/+SzA==",
"version": "1.16.0",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.0.tgz",
"integrity": "sha512-Zhev35/y7hRMcID/upReIvRse+I9SVhyVre/KTJSJQWMz3C3+G+HpO7m1wK/yckEtujKZ7dS4hkVxAnmHaIGVQ==",
"requires": {
"asn1": "~0.2.3",
"assert-plus": "^1.0.0",

View File

@ -24,7 +24,9 @@
},
"homepage": "https://github.com/drobnikj/crawler-google-places#readme",
"dependencies": {
"apify": "^0.9.13"
"apify": "^0.9.13",
"request": "^2.88.0",
"request-promise": "^4.2.2"
},
"devDependencies": {
"chai": "^4.2.0",

View File

@ -122,6 +122,7 @@ module.exports = async (page, maxHeight, elementToScroll = 'body') => {
page.removeAllListeners('request');
logInfo(`Infinite scroll finished (${stringifyScrollInfo(scrollInfo)} resourcesStats=${JSON.stringify(resourcesStats)})`);
} catch (err) {
// Infinite scroll should not break whole crawler
logError('An exception thrown in infiniteScroll()', err);
}
};

View File

@ -1,5 +1,6 @@
const Apify = require('apify');
const placesCrawler = require('./places_crawler');
const { proxyCheck } = require('./proxy_check');
const { log } = Apify.utils;
Apify.main(async () => {
@ -8,6 +9,11 @@ Apify.main(async () => {
if (!searchString) throw new Error('Attribute searchString missing in input.');
const proxyCheckResult = await proxyCheck(proxyConfig);
if (!proxyCheckResult.isPass) {
throw new Error(`Proxy error: ${proxyCheckResult.message}`);
}
log.info('Scraping Google Places for search string:', searchString);
let startUrl;

47
src/proxy_check.js Normal file
View File

@ -0,0 +1,47 @@
const Apify = require('apify');
const rp = require('request-promise');
/**
* Check if user some of user proxies work for Google Maps
* @param proxyConfig
*/
const proxyCheck = async (proxyConfig) => {
const proxy = Apify.getApifyProxyUrl({ groups: proxyConfig.apifyProxyGroups });
// Check if user used Apify Proxy
if (!proxyConfig.useApifyProxy) {
return {
isPass: false,
message: 'Please use Apify proxy.',
};
}
// Check if user has access to selected proxy group
try {
await rp('https://api.apify.com/v2/browser-info/', { proxy, resolveWithFullResponse: true });
} catch(error) {
if (error.message.includes('tunneling socket could not be established')) {
return {
isPass: false,
message: 'Please use Apify available proxy group.',
};
}
throw error;
}
// Check if user has access to Google Maps
const googleCheck = await rp('http://maps.google.com', { proxy, resolveWithFullResponse: true });
if (googleCheck.statusCode !== 200) {
return {
isPass: false,
message: `One of proxy groups ${proxyConfig.apifyProxyGroups.join(',')} failed to connect to Google Maps.`,
};
}
return { isPass: true };
};
module.exports = {
proxyCheck,
};