Coverage for backend / app / geolocation / geolocation.py: 99%
76 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
1"""Geolocation handling using OpenStreetMap Nominatim API with caching."""
3import html
4import logging
5import threading
6import time
7import traceback
9import requests
10from sqlalchemy.orm import Session
12from app.config import settings
13from app.models import Geolocation
14from app.resources import COUNTRIES
16_last_call_time = 0.0
17_api_lock = threading.Lock()
20def call_geocoding_api(query: str) -> tuple[float, float, dict]:
21 """Geocode using OpenStreetMap Nominatim API directly.
22 :param query: A location query string or a dict with structured params (postcode, city, country).
23 :return: A tuple of (latitude, longitude, formatted_address).
24 :raises RuntimeError: If the API call fails or returns no results."""
26 global _last_call_time
28 print("Calling Nominatim API for query:", query)
29 base_url = "https://nominatim.openstreetmap.org/search"
30 params = {"format": "json", "limit": 1, "addressdetails": 1, "q": query}
31 headers = {"User-Agent": f"JAM/{settings.app_version} ({settings.main_email_username})"}
33 try:
34 with _api_lock:
35 elapsed = time.monotonic() - _last_call_time
36 if elapsed < 1.0:
37 time.sleep(1.0 - elapsed)
38 response = requests.get(base_url, params=params, headers=headers, timeout=5)
39 _last_call_time = time.monotonic()
40 response.raise_for_status()
41 data = response.json()
42 except Exception as e:
43 raise RuntimeError(f"Nominatim API error: {str(e)}")
45 if data and len(data) > 0:
46 result = data[0]
47 return float(result["lat"]), float(result["lon"]), result.get("address", {})
48 else:
49 raise ValueError(f"No results found for: {query}")
52def geocode_location(
53 query: str | dict,
54 db: Session,
55 logger: logging.Logger | None = None,
56) -> Geolocation | None:
57 """Geocode a location or scraped job using cached results when available.
58 Links the location/scraped job to a Geolocation record via foreign key.
59 :param query: A location query string or a dict with structured params (postcode, city, country).
60 :param db: SQLAlchemy session for database operations.
61 :param logger: AppLogger instance
62 :return: The geolocation ID if successful, else None."""
64 # Decode HTML entities and normalise whitespace
65 if isinstance(query, dict):
66 sanitised_query = {k: html.unescape(v).strip() if isinstance(v, str) else v for k, v in query.items() if v}
67 sanitised_query = ", ".join(sanitised_query.values())
68 else:
69 sanitised_query = html.unescape(query).strip()
71 # Check cache first
72 cached = db.query(Geolocation).filter_by(query=sanitised_query).first()
74 if cached:
75 return cached
76 else:
77 try:
78 lat, lon, address_dict = call_geocoding_api(sanitised_query)
80 # Create new geolocation entry
81 oms_country = address_dict.get("country")
82 matched_country = None
83 if oms_country:
84 for country in COUNTRIES:
85 if oms_country.lower() == country["name"].lower():
86 matched_country = country["name"]
87 break
89 new_geo = Geolocation(
90 query=sanitised_query,
91 latitude=lat,
92 longitude=lon,
93 data=address_dict,
94 postcode=address_dict.get("postcode"),
95 city=address_dict.get("town") or address_dict.get("city"),
96 country=matched_country,
97 )
98 db.add(new_geo)
99 db.commit()
100 db.refresh(new_geo)
101 message = f"Successfully geocode '{sanitised_query}' to '{new_geo.id}'"
102 if logger is not None:
103 logger.info(message)
104 else:
105 print(message)
106 return new_geo
108 # If no result was found, store the query string to avoid repeated calls to the API
109 except ValueError:
110 new_geo = Geolocation(query=sanitised_query)
111 db.add(new_geo)
112 db.commit()
113 db.refresh(new_geo)
114 message = f"Failed to geocode '{sanitised_query}'. Stored in '{new_geo.id}'"
115 if logger is not None:
116 logger.warning(message)
117 else:
118 print(message)
119 return new_geo
121 except Exception as e:
122 print(f"Warning: Could not geocode '{sanitised_query}': {e}")
123 print(traceback.format_exc())
124 message = f"Failed to geocode '{sanitised_query}' due to error {e}"
125 if logger is not None:
126 logger.warning(message)
127 else:
128 print(message)
129 return None