mirror of
https://github.com/davidjohnbarton/crawler-google-places.git
synced 2025-12-12 08:28:46 +00:00
Added google check
This commit is contained in:
parent
9918348e29
commit
22bf467992
23
package-lock.json
generated
23
package-lock.json
generated
|
|
@ -27,9 +27,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ajv": {
|
"ajv": {
|
||||||
"version": "6.6.1",
|
"version": "6.6.2",
|
||||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.1.tgz",
|
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.2.tgz",
|
||||||
"integrity": "sha512-ZoJjft5B+EJBjUyu9C9Hc0OZyPZSSlOF+plzouTrg6UlA8f+e/n8NIgBFG/9tppJtpPWfthHakK7juJdNDODww==",
|
"integrity": "sha512-FBHEW6Jf5TB9MGBgUUA9XHkTbjXYfAUjY43ACMfmdMRHniyoMHjHjzD50OK8LGDWQwp4rWEsIq5kEqq7rvIM1g==",
|
||||||
"requires": {
|
"requires": {
|
||||||
"fast-deep-equal": "^2.0.1",
|
"fast-deep-equal": "^2.0.1",
|
||||||
"fast-json-stable-stringify": "^2.0.0",
|
"fast-json-stable-stringify": "^2.0.0",
|
||||||
|
|
@ -1110,6 +1110,17 @@
|
||||||
"uuid": "^3.3.2"
|
"uuid": "^3.3.2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"request-promise": {
|
||||||
|
"version": "4.2.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.2.tgz",
|
||||||
|
"integrity": "sha1-0epG1lSm7k+O5qT+oQGMIpEZBLQ=",
|
||||||
|
"requires": {
|
||||||
|
"bluebird": "^3.5.0",
|
||||||
|
"request-promise-core": "1.1.1",
|
||||||
|
"stealthy-require": "^1.1.0",
|
||||||
|
"tough-cookie": ">=2.3.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
"request-promise-core": {
|
"request-promise-core": {
|
||||||
"version": "1.1.1",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.1.tgz",
|
||||||
|
|
@ -1178,9 +1189,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"sshpk": {
|
"sshpk": {
|
||||||
"version": "1.15.2",
|
"version": "1.16.0",
|
||||||
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz",
|
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.0.tgz",
|
||||||
"integrity": "sha512-Ra/OXQtuh0/enyl4ETZAfTaeksa6BXks5ZcjpSUNrjBr0DvrJKX+1fsKDPpT9TBXgHAFsa4510aNVgI8g/+SzA==",
|
"integrity": "sha512-Zhev35/y7hRMcID/upReIvRse+I9SVhyVre/KTJSJQWMz3C3+G+HpO7m1wK/yckEtujKZ7dS4hkVxAnmHaIGVQ==",
|
||||||
"requires": {
|
"requires": {
|
||||||
"asn1": "~0.2.3",
|
"asn1": "~0.2.3",
|
||||||
"assert-plus": "^1.0.0",
|
"assert-plus": "^1.0.0",
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,9 @@
|
||||||
},
|
},
|
||||||
"homepage": "https://github.com/drobnikj/crawler-google-places#readme",
|
"homepage": "https://github.com/drobnikj/crawler-google-places#readme",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"apify": "^0.9.13"
|
"apify": "^0.9.13",
|
||||||
|
"request": "^2.88.0",
|
||||||
|
"request-promise": "^4.2.2"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"chai": "^4.2.0",
|
"chai": "^4.2.0",
|
||||||
|
|
|
||||||
|
|
@ -122,6 +122,7 @@ module.exports = async (page, maxHeight, elementToScroll = 'body') => {
|
||||||
page.removeAllListeners('request');
|
page.removeAllListeners('request');
|
||||||
logInfo(`Infinite scroll finished (${stringifyScrollInfo(scrollInfo)} resourcesStats=${JSON.stringify(resourcesStats)})`);
|
logInfo(`Infinite scroll finished (${stringifyScrollInfo(scrollInfo)} resourcesStats=${JSON.stringify(resourcesStats)})`);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
// Infinite scroll should not break whole crawler
|
||||||
logError('An exception thrown in infiniteScroll()', err);
|
logError('An exception thrown in infiniteScroll()', err);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
const Apify = require('apify');
|
const Apify = require('apify');
|
||||||
const placesCrawler = require('./places_crawler');
|
const placesCrawler = require('./places_crawler');
|
||||||
|
const { proxyCheck } = require('./proxy_check');
|
||||||
const { log } = Apify.utils;
|
const { log } = Apify.utils;
|
||||||
|
|
||||||
Apify.main(async () => {
|
Apify.main(async () => {
|
||||||
|
|
@ -8,6 +9,11 @@ Apify.main(async () => {
|
||||||
|
|
||||||
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
||||||
|
|
||||||
|
const proxyCheckResult = await proxyCheck(proxyConfig);
|
||||||
|
if (!proxyCheckResult.isPass) {
|
||||||
|
throw new Error(`Proxy error: ${proxyCheckResult.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
log.info('Scraping Google Places for search string:', searchString);
|
log.info('Scraping Google Places for search string:', searchString);
|
||||||
|
|
||||||
let startUrl;
|
let startUrl;
|
||||||
|
|
|
||||||
47
src/proxy_check.js
Normal file
47
src/proxy_check.js
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
const Apify = require('apify');
|
||||||
|
const rp = require('request-promise');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if user some of user proxies work for Google Maps
|
||||||
|
* @param proxyConfig
|
||||||
|
*/
|
||||||
|
const proxyCheck = async (proxyConfig) => {
|
||||||
|
const proxy = Apify.getApifyProxyUrl({ groups: proxyConfig.apifyProxyGroups });
|
||||||
|
|
||||||
|
// Check if user used Apify Proxy
|
||||||
|
if (!proxyConfig.useApifyProxy) {
|
||||||
|
return {
|
||||||
|
isPass: false,
|
||||||
|
message: 'Please use Apify proxy.',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if user has access to selected proxy group
|
||||||
|
try {
|
||||||
|
await rp('https://api.apify.com/v2/browser-info/', { proxy, resolveWithFullResponse: true });
|
||||||
|
} catch(error) {
|
||||||
|
if (error.message.includes('tunneling socket could not be established')) {
|
||||||
|
return {
|
||||||
|
isPass: false,
|
||||||
|
message: 'Please use Apify available proxy group.',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if user has access to Google Maps
|
||||||
|
const googleCheck = await rp('http://maps.google.com', { proxy, resolveWithFullResponse: true });
|
||||||
|
if (googleCheck.statusCode !== 200) {
|
||||||
|
return {
|
||||||
|
isPass: false,
|
||||||
|
message: `One of proxy groups ${proxyConfig.apifyProxyGroups.join(',')} failed to connect to Google Maps.`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { isPass: true };
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
proxyCheck,
|
||||||
|
};
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user