mirror of
https://github.com/davidjohnbarton/crawler-google-places.git
synced 2025-12-12 16:38:45 +00:00
Improved proxy configuration
This commit is contained in:
parent
42a0735093
commit
cd4e552685
|
|
@ -4,13 +4,6 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"schemaVersion": 1,
|
"schemaVersion": 1,
|
||||||
"properties": {
|
"properties": {
|
||||||
"proxyGoups": {
|
|
||||||
"title": "Proxy configuration",
|
|
||||||
"type": "object",
|
|
||||||
"description": "Optionaly use Apify Proxy",
|
|
||||||
"prefill": { "useApifyProxy": true },
|
|
||||||
"editor": "proxy"
|
|
||||||
},
|
|
||||||
"searchString": {
|
"searchString": {
|
||||||
"title": "Search",
|
"title": "Search",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|
@ -18,6 +11,13 @@
|
||||||
"prefill": "New York Gym",
|
"prefill": "New York Gym",
|
||||||
"editor": "textfield"
|
"editor": "textfield"
|
||||||
},
|
},
|
||||||
|
"proxyConfig": {
|
||||||
|
"title": "Proxy configuration",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Use Apify Proxy, you need to have some proxy group to results on Google.",
|
||||||
|
"prefill": { "useApifyProxy": true },
|
||||||
|
"editor": "proxy"
|
||||||
|
},
|
||||||
"lat": {
|
"lat": {
|
||||||
"title": "Viewport Latitude",
|
"title": "Viewport Latitude",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ Example input:
|
||||||
On this input actor searches places on this start url: https://www.google.com/maps/search/%C4%8Dsob/@50.0860729,14.4135326,10z
|
On this input actor searches places on this start url: https://www.google.com/maps/search/%C4%8Dsob/@50.0860729,14.4135326,10z
|
||||||
|
|
||||||
- `searchString` - String will be search on Google maps
|
- `searchString` - String will be search on Google maps
|
||||||
|
- `proxyConfig` - Apify proxy configuration
|
||||||
- `lat` - Viewport latitude
|
- `lat` - Viewport latitude
|
||||||
- `lng` - Viewport longitude
|
- `lng` - Viewport longitude
|
||||||
- `zoom` - Viewport zoom, e.g zoom: 10 -> https://www.google.com/maps/@50.0860729,14.4135326,10z vs zoom: 1 -> https://www.google.com/maps/@50.0860729,14.4135326,10z
|
- `zoom` - Viewport zoom, e.g zoom: 10 -> https://www.google.com/maps/@50.0860729,14.4135326,10z vs zoom: 1 -> https://www.google.com/maps/@50.0860729,14.4135326,10z
|
||||||
|
|
|
||||||
17
src/main.js
17
src/main.js
|
|
@ -31,7 +31,7 @@ const enqueueAllUrlsFromPagination = async (page, requestQueue) => {
|
||||||
|
|
||||||
Apify.main(async () => {
|
Apify.main(async () => {
|
||||||
const input = await Apify.getValue('INPUT');
|
const input = await Apify.getValue('INPUT');
|
||||||
const { searchString, lat, lng } = input;
|
const { searchString, proxyConfig, lat, lng } = input;
|
||||||
|
|
||||||
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
if (!searchString) throw new Error('Attribute searchString missing in input.');
|
||||||
|
|
||||||
|
|
@ -55,13 +55,16 @@ Apify.main(async () => {
|
||||||
// NOTE: Ensured - If pageFunction failed crawler skipped already scraped pagination
|
// NOTE: Ensured - If pageFunction failed crawler skipped already scraped pagination
|
||||||
let listingPagination = await Apify.getValue(LISTING_PAGINATION_KEY) || {};
|
let listingPagination = await Apify.getValue(LISTING_PAGINATION_KEY) || {};
|
||||||
|
|
||||||
|
const launchPuppeteerOptions = {
|
||||||
|
useApifyProxy: true,
|
||||||
|
// useChrome: true,
|
||||||
|
apifyProxyGroups: ['CZECH_LUMINATI'],
|
||||||
|
// liveView: Apify.isAtHome(),
|
||||||
|
};
|
||||||
|
if (proxyConfig) Object.assign(launchPuppeteerOptions, proxyConfig);
|
||||||
|
|
||||||
const crawler = new Apify.PuppeteerCrawler({
|
const crawler = new Apify.PuppeteerCrawler({
|
||||||
launchPuppeteerOptions: {
|
launchPuppeteerOptions,
|
||||||
useApifyProxy: true,
|
|
||||||
// useChrome: true,
|
|
||||||
apifyProxyGroups: ['CZECH_LUMINATI'],
|
|
||||||
// liveView: Apify.isAtHome(),
|
|
||||||
},
|
|
||||||
requestQueue,
|
requestQueue,
|
||||||
handlePageTimeoutSecs: 1800, // We are adding all links to queue on startUrl
|
handlePageTimeoutSecs: 1800, // We are adding all links to queue on startUrl
|
||||||
handlePageFunction: async ({ request, page }) => {
|
handlePageFunction: async ({ request, page }) => {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user