Coverage for backend / app / geolocation / geolocation.py: 99%

76 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""Geolocation handling using OpenStreetMap Nominatim API with caching.""" 

2 

3import html 

4import logging 

5import threading 

6import time 

7import traceback 

8 

9import requests 

10from sqlalchemy.orm import Session 

11 

12from app.config import settings 

13from app.models import Geolocation 

14from app.resources import COUNTRIES 

15 

16_last_call_time = 0.0 

17_api_lock = threading.Lock() 

18 

19 

20def call_geocoding_api(query: str) -> tuple[float, float, dict]: 

21 """Geocode using OpenStreetMap Nominatim API directly. 

22 :param query: A location query string or a dict with structured params (postcode, city, country). 

23 :return: A tuple of (latitude, longitude, formatted_address). 

24 :raises RuntimeError: If the API call fails or returns no results.""" 

25 

26 global _last_call_time 

27 

28 print("Calling Nominatim API for query:", query) 

29 base_url = "https://nominatim.openstreetmap.org/search" 

30 params = {"format": "json", "limit": 1, "addressdetails": 1, "q": query} 

31 headers = {"User-Agent": f"JAM/{settings.app_version} ({settings.main_email_username})"} 

32 

33 try: 

34 with _api_lock: 

35 elapsed = time.monotonic() - _last_call_time 

36 if elapsed < 1.0: 

37 time.sleep(1.0 - elapsed) 

38 response = requests.get(base_url, params=params, headers=headers, timeout=5) 

39 _last_call_time = time.monotonic() 

40 response.raise_for_status() 

41 data = response.json() 

42 except Exception as e: 

43 raise RuntimeError(f"Nominatim API error: {str(e)}") 

44 

45 if data and len(data) > 0: 

46 result = data[0] 

47 return float(result["lat"]), float(result["lon"]), result.get("address", {}) 

48 else: 

49 raise ValueError(f"No results found for: {query}") 

50 

51 

52def geocode_location( 

53 query: str | dict, 

54 db: Session, 

55 logger: logging.Logger | None = None, 

56) -> Geolocation | None: 

57 """Geocode a location or scraped job using cached results when available. 

58 Links the location/scraped job to a Geolocation record via foreign key. 

59 :param query: A location query string or a dict with structured params (postcode, city, country). 

60 :param db: SQLAlchemy session for database operations. 

61 :param logger: AppLogger instance 

62 :return: The geolocation ID if successful, else None.""" 

63 

64 # Decode HTML entities and normalise whitespace 

65 if isinstance(query, dict): 

66 sanitised_query = {k: html.unescape(v).strip() if isinstance(v, str) else v for k, v in query.items() if v} 

67 sanitised_query = ", ".join(sanitised_query.values()) 

68 else: 

69 sanitised_query = html.unescape(query).strip() 

70 

71 # Check cache first 

72 cached = db.query(Geolocation).filter_by(query=sanitised_query).first() 

73 

74 if cached: 

75 return cached 

76 else: 

77 try: 

78 lat, lon, address_dict = call_geocoding_api(sanitised_query) 

79 

80 # Create new geolocation entry 

81 oms_country = address_dict.get("country") 

82 matched_country = None 

83 if oms_country: 

84 for country in COUNTRIES: 

85 if oms_country.lower() == country["name"].lower(): 

86 matched_country = country["name"] 

87 break 

88 

89 new_geo = Geolocation( 

90 query=sanitised_query, 

91 latitude=lat, 

92 longitude=lon, 

93 data=address_dict, 

94 postcode=address_dict.get("postcode"), 

95 city=address_dict.get("town") or address_dict.get("city"), 

96 country=matched_country, 

97 ) 

98 db.add(new_geo) 

99 db.commit() 

100 db.refresh(new_geo) 

101 message = f"Successfully geocode '{sanitised_query}' to '{new_geo.id}'" 

102 if logger is not None: 

103 logger.info(message) 

104 else: 

105 print(message) 

106 return new_geo 

107 

108 # If no result was found, store the query string to avoid repeated calls to the API 

109 except ValueError: 

110 new_geo = Geolocation(query=sanitised_query) 

111 db.add(new_geo) 

112 db.commit() 

113 db.refresh(new_geo) 

114 message = f"Failed to geocode '{sanitised_query}'. Stored in '{new_geo.id}'" 

115 if logger is not None: 

116 logger.warning(message) 

117 else: 

118 print(message) 

119 return new_geo 

120 

121 except Exception as e: 

122 print(f"Warning: Could not geocode '{sanitised_query}': {e}") 

123 print(traceback.format_exc()) 

124 message = f"Failed to geocode '{sanitised_query}' due to error {e}" 

125 if logger is not None: 

126 logger.warning(message) 

127 else: 

128 print(message) 

129 return None