Added google check

This commit is contained in:
drobnikj 2019-01-08 11:39:36 +01:00
parent 9918348e29
commit 22bf467992
5 changed files with 74 additions and 7 deletions

23
package-lock.json generated
View File

@ -27,9 +27,9 @@
} }
}, },
"ajv": { "ajv": {
"version": "6.6.1", "version": "6.6.2",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.1.tgz", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.2.tgz",
"integrity": "sha512-ZoJjft5B+EJBjUyu9C9Hc0OZyPZSSlOF+plzouTrg6UlA8f+e/n8NIgBFG/9tppJtpPWfthHakK7juJdNDODww==", "integrity": "sha512-FBHEW6Jf5TB9MGBgUUA9XHkTbjXYfAUjY43ACMfmdMRHniyoMHjHjzD50OK8LGDWQwp4rWEsIq5kEqq7rvIM1g==",
"requires": { "requires": {
"fast-deep-equal": "^2.0.1", "fast-deep-equal": "^2.0.1",
"fast-json-stable-stringify": "^2.0.0", "fast-json-stable-stringify": "^2.0.0",
@ -1110,6 +1110,17 @@
"uuid": "^3.3.2" "uuid": "^3.3.2"
} }
}, },
"request-promise": {
"version": "4.2.2",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.2.tgz",
"integrity": "sha1-0epG1lSm7k+O5qT+oQGMIpEZBLQ=",
"requires": {
"bluebird": "^3.5.0",
"request-promise-core": "1.1.1",
"stealthy-require": "^1.1.0",
"tough-cookie": ">=2.3.3"
}
},
"request-promise-core": { "request-promise-core": {
"version": "1.1.1", "version": "1.1.1",
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.1.tgz", "resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.1.tgz",
@ -1178,9 +1189,9 @@
} }
}, },
"sshpk": { "sshpk": {
"version": "1.15.2", "version": "1.16.0",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.0.tgz",
"integrity": "sha512-Ra/OXQtuh0/enyl4ETZAfTaeksa6BXks5ZcjpSUNrjBr0DvrJKX+1fsKDPpT9TBXgHAFsa4510aNVgI8g/+SzA==", "integrity": "sha512-Zhev35/y7hRMcID/upReIvRse+I9SVhyVre/KTJSJQWMz3C3+G+HpO7m1wK/yckEtujKZ7dS4hkVxAnmHaIGVQ==",
"requires": { "requires": {
"asn1": "~0.2.3", "asn1": "~0.2.3",
"assert-plus": "^1.0.0", "assert-plus": "^1.0.0",

View File

@ -24,7 +24,9 @@
}, },
"homepage": "https://github.com/drobnikj/crawler-google-places#readme", "homepage": "https://github.com/drobnikj/crawler-google-places#readme",
"dependencies": { "dependencies": {
"apify": "^0.9.13" "apify": "^0.9.13",
"request": "^2.88.0",
"request-promise": "^4.2.2"
}, },
"devDependencies": { "devDependencies": {
"chai": "^4.2.0", "chai": "^4.2.0",

View File

@ -122,6 +122,7 @@ module.exports = async (page, maxHeight, elementToScroll = 'body') => {
page.removeAllListeners('request'); page.removeAllListeners('request');
logInfo(`Infinite scroll finished (${stringifyScrollInfo(scrollInfo)} resourcesStats=${JSON.stringify(resourcesStats)})`); logInfo(`Infinite scroll finished (${stringifyScrollInfo(scrollInfo)} resourcesStats=${JSON.stringify(resourcesStats)})`);
} catch (err) { } catch (err) {
// Infinite scroll should not break whole crawler
logError('An exception thrown in infiniteScroll()', err); logError('An exception thrown in infiniteScroll()', err);
} }
}; };

View File

@ -1,5 +1,6 @@
const Apify = require('apify'); const Apify = require('apify');
const placesCrawler = require('./places_crawler'); const placesCrawler = require('./places_crawler');
const { proxyCheck } = require('./proxy_check');
const { log } = Apify.utils; const { log } = Apify.utils;
Apify.main(async () => { Apify.main(async () => {
@ -8,6 +9,11 @@ Apify.main(async () => {
if (!searchString) throw new Error('Attribute searchString missing in input.'); if (!searchString) throw new Error('Attribute searchString missing in input.');
const proxyCheckResult = await proxyCheck(proxyConfig);
if (!proxyCheckResult.isPass) {
throw new Error(`Proxy error: ${proxyCheckResult.message}`);
}
log.info('Scraping Google Places for search string:', searchString); log.info('Scraping Google Places for search string:', searchString);
let startUrl; let startUrl;

47
src/proxy_check.js Normal file
View File

@ -0,0 +1,47 @@
const Apify = require('apify');
const rp = require('request-promise');
/**
* Check if user some of user proxies work for Google Maps
* @param proxyConfig
*/
const proxyCheck = async (proxyConfig) => {
const proxy = Apify.getApifyProxyUrl({ groups: proxyConfig.apifyProxyGroups });
// Check if user used Apify Proxy
if (!proxyConfig.useApifyProxy) {
return {
isPass: false,
message: 'Please use Apify proxy.',
};
}
// Check if user has access to selected proxy group
try {
await rp('https://api.apify.com/v2/browser-info/', { proxy, resolveWithFullResponse: true });
} catch(error) {
if (error.message.includes('tunneling socket could not be established')) {
return {
isPass: false,
message: 'Please use Apify available proxy group.',
};
}
throw error;
}
// Check if user has access to Google Maps
const googleCheck = await rp('http://maps.google.com', { proxy, resolveWithFullResponse: true });
if (googleCheck.statusCode !== 200) {
return {
isPass: false,
message: `One of proxy groups ${proxyConfig.apifyProxyGroups.join(',')} failed to connect to Google Maps.`,
};
}
return { isPass: true };
};
module.exports = {
proxyCheck,
};