Extract every business in any geographic area from Google Maps -- no browser needed.
gmaps-extractor reverse-engineers Google Maps' internal API to collect business data at scale using raw HTTP requests. Point it at a city and a category, and it systematically covers the entire area using grid-based search with automatic deduplication.
100K+ records/week capable with parallel processing and proxy support.
- Full area coverage -- Divides any area into a grid of searchable cells. No results missed.
- No browser required -- Pure HTTP requests using httpx. No Selenium, no Puppeteer.
- Async support --
async_collect_v2()andstream_collect_v2()for non-blocking I/O. - Streaming -- Async generator yields businesses as they are found.
- Event system -- Lifecycle callbacks for monitoring collection progress.
- Parallel processing -- Configurable worker pool (up to 50 concurrent requests).
- Resumable collection -- V2 collector saves checkpoints and auto-resumes.
- Enrichment -- Fetch place details (hours, phone, website) and reviews concurrently.
- Adaptive rate limiting -- Exponential backoff with jitter. Auto-adjusts to Google's limits.
- Smart deduplication -- Deduplicates by both
place_idandhex_id. - Auto cookie management -- Builds Google sessions automatically, refreshes on failure.
- Structured logging -- Uses Python's
loggingmodule. Silent by default, configurable. - Lightweight core -- Only requires
httpx. FastAPI server is optional.
from gmaps_extractor import GMapsExtractor
with GMapsExtractor(proxy="http://user:pass@proxy-host:port") as extractor:
result = extractor.collect_v2("New York, USA", "lawyers", enrich=True)
print(f"Found {len(result)} businesses")
for biz in result:
print(f" {biz['name']} - {biz.get('phone', 'N/A')}")# Core library (recommended)
pip install gmaps-extractor
# With FastAPI server support (for CLI or legacy workflows)
pip install gmaps-extractor[server]
# Development
pip install gmaps-extractor[dev]git clone https://github.com/promisingcoder/GoogleMapsCollector.git
cd GoogleMapsCollector
pip install -e ".[dev]"- Python 3.9+
- A residential/sticky proxy (required -- Google blocks datacenter IPs)
No server process needed. Requests go directly to Google Maps via httpx.
from gmaps_extractor import GMapsExtractor
with GMapsExtractor(proxy="http://user:pass@host:port") as extractor:
# Basic collection
result = extractor.collect("London, UK", "dentists")
# V2 collector with enrichment and reviews
result = extractor.collect_v2(
"Paris, France",
"restaurants",
enrich=True,
reviews=True,
reviews_limit=50,
workers=30,
)
# Access results
print(result.metadata) # {"area": "Paris, France", "category": "restaurants", ...}
print(result.statistics) # {"total_collected": 1234, ...}
for biz in result:
print(biz["name"], biz.get("rating"))import asyncio
from gmaps_extractor import GMapsExtractor
async def main():
async with GMapsExtractor(proxy="http://user:pass@host:port") as extractor:
# Collect all results at once (async)
result = await extractor.async_collect_v2(
"Manhattan, NY",
"lawyers",
enrich=True,
reviews=True,
)
print(f"Found {len(result)} businesses")
asyncio.run(main())Process businesses as they are found, without waiting for the full collection to finish.
import asyncio
from gmaps_extractor import GMapsExtractor
async def main():
async with GMapsExtractor(proxy="http://user:pass@host:port") as extractor:
async for biz in extractor.stream_collect_v2("NYC", "coffee shops"):
print(f"Found: {biz['name']} at {biz.get('address', 'N/A')}")
asyncio.run(main())Break large areas into named sub-areas (boroughs, districts, neighborhoods) for better coverage.
with GMapsExtractor(proxy="http://user:pass@host:port") as extractor:
result = extractor.collect_v2(
"London, UK",
"dentists",
subdivide=True,
enrich=True,
)Monitor collection progress with lifecycle callbacks.
from gmaps_extractor import GMapsExtractor, EventType, EventEmitter
emitter = EventEmitter()
def on_cell_complete(event):
print(f"Cell done: +{event.data.get('businesses_found', 0)} businesses")
def on_complete(event):
total = event.data.get("total_businesses", 0)
print(f"Collection complete: {total} businesses")
emitter.on(EventType.CELL_COMPLETE, on_cell_complete)
emitter.on(EventType.COLLECTION_COMPLETE, on_complete)
with GMapsExtractor(proxy="http://user:pass@host:port", events=emitter) as extractor:
result = extractor.collect_v2("NYC", "lawyers")Or use the convenience shortcuts:
with GMapsExtractor(
proxy="http://user:pass@host:port",
on_business_found=lambda e: print(f"Found: {e.data}"),
on_collection_complete=lambda e: print(f"Done: {e.data}"),
) as extractor:
result = extractor.collect_v2("NYC", "lawyers")The library uses Python's logging module with a NullHandler by default (no output). Set verbose=True (the default) to see progress output, or configure logging manually.
import logging
# Option 1: Use verbose=True (default)
with GMapsExtractor(proxy="...", verbose=True) as extractor:
result = extractor.collect("NYC", "lawyers") # Progress printed to stdout
# Option 2: Configure logging manually
logging.getLogger("gmaps_extractor").setLevel(logging.DEBUG)
logging.getLogger("gmaps_extractor").addHandler(logging.StreamHandler())
with GMapsExtractor(proxy="...", verbose=False) as extractor:
result = extractor.collect("NYC", "lawyers") # DEBUG-level outputUse GMapsClient or AsyncGMapsClient directly for custom workflows.
from gmaps_extractor.client import GMapsClient
from gmaps_extractor.settings import GMapsSettings
settings = GMapsSettings(proxy_url="http://user:pass@host:port")
client = GMapsClient(settings)
# Search
businesses = client.search("lawyers", lat=40.7128, lng=-74.0060)
# Place details
details = client.place_details(hex_id="0x89c259a...:0x25d41...", name="Acme Law")
# Reviews
reviews = client.reviews(hex_id="0x89c259a...:0x25d41...", limit=20)| Parameter | Type | Default | Description |
|---|---|---|---|
proxy |
str |
None |
Proxy URL. Falls back to GMAPS_PROXY_* env vars. |
cookies |
dict |
None |
Explicit cookie override. Auto-managed if None. |
workers |
int |
20 |
Parallel search workers. |
use_server |
bool |
False |
Use legacy FastAPI server (requires [server] extra). |
verbose |
bool |
True |
Enable progress output via logging. |
events |
EventEmitter |
auto | Event emitter for lifecycle hooks. |
progress |
bool/ProgressReporter |
auto | Progress reporter (attached when verbose=True). |
on_business_found |
callable |
None |
Shortcut callback for BUSINESS_FOUND events. |
on_collection_complete |
callable |
None |
Shortcut callback for COLLECTION_COMPLETE events. |
server_port |
int |
8000 |
Port for legacy server mode. |
export GMAPS_PROXY_HOST="proxy-host:port"
export GMAPS_PROXY_USER="username"
export GMAPS_PROXY_PASS="password"
export GMAPS_COOKIES='{"NID":"...","SOCS":"..."}'- Constructor arguments (highest priority)
- Environment variables
config.py/_config_defaults.pydefaults (lowest priority)
from gmaps_extractor import GMapsExtractor
from gmaps_extractor.exceptions import (
GMapsExtractorError,
BoundaryError,
ConfigurationError,
RateLimitError,
AuthenticationError,
ServerError,
)
try:
with GMapsExtractor(proxy="http://user:pass@host:port") as extractor:
result = extractor.collect_v2("New York, USA", "lawyers")
except BoundaryError:
print("Could not resolve area boundaries via Nominatim")
except RateLimitError:
print("Rate limit exceeded after all retries")
except AuthenticationError:
print("Proxy or cookie authentication failed")
except GMapsExtractorError as e:
print(f"Extraction failed: {e}")After installing, these commands are available:
# V2 collector (recommended)
gmaps-collect-v2 "Manhattan, New York" "lawyers" --enrich --reviews -l 100
# V1 collector
gmaps-collect "New York, USA" "lawyers" --subdivide
# Add reviews to existing collection
gmaps-enrich-reviews output/lawyers_in_manhattan.json -l 50
# Start FastAPI server (only needed for CLI usage)
gmaps-serverNote: CLI commands require the FastAPI server to be running (gmaps-server). The library API does not.
JSON and CSV files are generated in the output/ directory.
{
"metadata": {
"area": "New York, USA",
"category": "lawyers",
"boundary": {"name": "New York", "north": 40.91, "south": 40.49, "east": -73.70, "west": -74.25},
"search_mode": "grid",
"enrichment": {"details_fetched": true, "reviews_fetched": true, "reviews_limit": 20}
},
"statistics": {
"total_collected": 1234,
"duplicates_removed": 89,
"search_time_seconds": 120.5,
"total_time_seconds": 340.2
},
"businesses": [
{
"name": "Smith & Associates",
"address": "123 Broadway, New York, NY 10006",
"place_id": "ChIJ...",
"rating": 4.5,
"review_count": 123,
"latitude": 40.7128,
"longitude": -74.0060,
"phone": "+1 212-555-0123",
"website": "https://example.com",
"category": "Lawyer",
"hours": {"monday": "9:00 AM - 5:00 PM"},
"reviews_data": [{"author": "John", "rating": 5, "text": "Excellent!", "date": "2 months ago"}]
}
]
}gmaps_extractor/
├── extractor.py # GMapsExtractor (high-level API) + CollectionResult
├── client.py # GMapsClient (sync HTTP, default path)
├── async_client.py # AsyncGMapsClient (async HTTP)
├── settings.py # GMapsSettings dataclass
├── events.py # EventEmitter + EventType
├── progress.py # ProgressReporter
├── exceptions.py # Exception hierarchy
├── parsers/ # Response parsers (business, place, reviews)
├── geo/ # Grid generation, Nominatim boundary resolution
├── extraction/ # Collection orchestrators (sync, async, streaming)
├── decoder/ # Protobuf parameter decoder
└── server.py # Optional FastAPI server
See CLAUDE.md for architecture details, common tasks, and development commands.
git clone https://github.com/promisingcoder/GoogleMapsCollector.git
cd GoogleMapsCollector
pip install -e ".[dev]"
pytestMIT License -- See LICENSE for details.