Coverage for backend / app / job_email_scraping / schemas.py: 100%

162 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""Pydantic schemas for the Job Email Scraping. 

2Contains data models for job alert emails, scraped job postings, and service logs 

3used in the external job scraping and notification system.""" 

4 

5import datetime as dt 

6from datetime import datetime 

7 

8from pydantic import field_validator, Field 

9 

10from app.base_schemas import BaseModel, OwnedOut, Out, serialise_relationships 

11from app.data_tables.schemas import GeolocationOut 

12from app.job_rating.schemas import JobRatingOut 

13 

14 

15# --------------------------------------------------- JOB ALERT EMAIL -------------------------------------------------- 

16 

17 

18class JobEmail(BaseModel): 

19 """Job Alert Email base schema""" 

20 

21 external_email_id: str | None 

22 subject: str | None = None 

23 sender: str | None = None 

24 date_received: datetime | None = None 

25 platform: str | None = None 

26 body: str | None = None 

27 service_log_id: int | None = None 

28 job_found_n: int | None = 0 

29 alert_name: str | None = None 

30 

31 

32class JobEmailUpdate(JobEmail): 

33 """Job Alert Email update schema""" 

34 

35 pass 

36 

37 

38class JobEmailOut(JobEmail, OwnedOut): 

39 """Job Alert Email output schema""" 

40 

41 jobs: list[int] 

42 

43 @field_validator("jobs", mode="before") 

44 @classmethod 

45 def serialize_relationships(cls, value) -> list[int]: 

46 """Serialize relationships to list of IDs""" 

47 

48 return serialise_relationships(value) 

49 

50 

51# ----------------------------------------------------- SCRAPED JOB ---------------------------------------------------- 

52 

53 

54class ScrapedJob(BaseModel): 

55 """Scraped Job base schema""" 

56 

57 external_job_id: str 

58 platform: str 

59 service_log_id: int 

60 is_processed: bool = False 

61 is_scraped: bool = False 

62 is_failed: bool = False 

63 scrape_datetime: datetime | None = None 

64 scrape_error: list[dict] = [] 

65 is_active: bool = True 

66 is_imported: bool = False 

67 is_skipped: bool = False 

68 skip_reason: str | None = None 

69 retry_count: int = 0 

70 next_retry_at: datetime | None = None 

71 

72 # Job data 

73 title: str | None = None 

74 description: str | None = None 

75 salary_min: float | None = None 

76 salary_max: float | None = None 

77 salary_currency: str | None = None 

78 url: str | None = None 

79 deadline: datetime | None = None 

80 parsed_location: str | None = None 

81 attendance_type: str | None = None 

82 is_closed: bool = False 

83 location: str | None = None 

84 location_city: str | None = None 

85 location_postcode: str | None = None 

86 location_country: str | None = None 

87 company: str | None = None 

88 

89 

90class ScrapedJobUpdate(BaseModel): 

91 """Scraped Job update schema""" 

92 

93 is_active: bool | None = None 

94 is_imported: bool | None = None 

95 

96 

97class ScrapedJobOut(ScrapedJob, OwnedOut): 

98 """Scraped Job output schema""" 

99 

100 emails: list[int] 

101 job_rating: JobRatingOut | None 

102 geolocation: GeolocationOut | None 

103 

104 @field_validator("emails", mode="before") 

105 @classmethod 

106 def serialize_relationships(cls, value) -> list[int]: 

107 """Serialize relationships to list of IDs""" 

108 

109 return serialise_relationships(value) 

110 

111 

112class PaginatedScrapedJobResponse(BaseModel): 

113 """Paginated Scraped Job response schema""" 

114 

115 items: list[ScrapedJobOut] 

116 total: int 

117 total_filtered: int 

118 page: int 

119 page_size: int 

120 total_pages: int 

121 

122 

123class PaginatedJobEmailResponse(BaseModel): 

124 """Paginated Job Email response schema""" 

125 

126 items: list[JobEmailOut] 

127 total: int 

128 total_filtered: int 

129 page: int 

130 page_size: int 

131 total_pages: int 

132 

133 

134# ----------------------------------------------------- SERVICE LOG ---------------------------------------------------- 

135 

136 

137class JobEmailScrapingServiceLogOut(Out): 

138 """Job Email Scraping Service Log output schema""" 

139 

140 run_datetime: datetime | None = None 

141 run_duration: float | None = None 

142 is_success: bool | None = None 

143 error_message: str | None = None 

144 

145 # Users 

146 user_found_ids: list[int] = [] 

147 user_processed_ids: list[int] = [] 

148 

149 # Emails 

150 email_found_n: int = 0 

151 email_saved_n: int = 0 

152 email_skipped_n: int = 0 

153 

154 # Jobs 

155 job_found_n: int = 0 

156 job_to_process_n: int = 0 

157 job_scrape_succeeded_n: int = 0 

158 job_scrape_failed_n: int = 0 

159 job_scrape_copied_n: int = 0 

160 job_scrape_skipped_n: int = 0 

161 

162 # Relationships 

163 emails: list[int] 

164 scraped_jobs: list[int] 

165 platform_stats: list["JobEmailScrapingPlatformStatOut"] 

166 service_errors: list["JobEmailScrapingServiceErrorOut"] 

167 

168 @field_validator("emails", "scraped_jobs", mode="before") 

169 @classmethod 

170 def serialize_relationships(cls, value) -> list[int]: 

171 """Serialize relationships to list of IDs""" 

172 return serialise_relationships(value) 

173 

174 

175# --------------------------------------------------- PLATFORM STATS --------------------------------------------------- 

176 

177 

178class JobEmailScrapingPlatformStatOut(Out): 

179 """Job Email Scraping Platform Stat output schema""" 

180 

181 name: str | None = None 

182 

183 # Jobs 

184 job_found_ids: list[int] = [] 

185 job_to_process_ids: list[int] = [] 

186 job_scrape_succeeded_ids: list[int] = [] 

187 job_scrape_failed_ids: list[int] = [] 

188 job_scrape_copied_ids: list[int] = [] 

189 job_scrape_skipped_ids: list[int] = [] 

190 

191 # Emails 

192 email_saved_ids: list[int] = [] 

193 email_skipped_ids: list[int] = [] 

194 

195 service_log_id: int | None = None 

196 

197 

198# --------------------------------------------- JOB SCRAPING SERVICE ERROR --------------------------------------------- 

199 

200 

201class JobEmailScrapingServiceErrorOut(Out): 

202 """Job Email Scraping Service Error output schema""" 

203 

204 error_type: str 

205 message: str 

206 traceback: str 

207 

208 

209# ------------------------------------------------ EMAIL SCRAPER SERVICE ----------------------------------------------- 

210 

211 

212class JobEmailScrapingStartRequest(BaseModel): 

213 """Start Request schema for email scraper service""" 

214 

215 period_hours: float | None = 3.0 

216 timedelta_days: int | None = 1 

217 

218 

219# ------------------------------------------------- SCRAPED JOB FILTER ------------------------------------------------- 

220 

221 

222class ScrapingFilterCreate(BaseModel): 

223 """Scraped Job Filter creation schema""" 

224 

225 type: str 

226 value: str 

227 operator: str 

228 is_active: bool = True 

229 case_sensitive: bool = False 

230 

231 

232class ScrapingFilterUpdate(ScrapingFilterCreate): 

233 """Scraped Job Filter update schema""" 

234 

235 type: str | None = None 

236 value: str | None = None 

237 operator: str | None = None 

238 

239 

240class ScrapingFilterOut(OwnedOut, ScrapingFilterCreate): 

241 """Scraped Job Filter output schema""" 

242 

243 filtered_jobs: list[int] 

244 

245 @field_validator("filtered_jobs", mode="before") 

246 @classmethod 

247 def serialize_relationships(cls, value) -> list[int]: 

248 """Serialize relationships to list of IDs""" 

249 return serialise_relationships(value) 

250 

251 

252# ------------------------------------------- FORWARDING CONFIRMATION LINK --------------------------------------------- 

253 

254 

255class ForwardingConfirmationLinkOut(OwnedOut): 

256 """Forwarding Confirmation Link output schema""" 

257 

258 url: str 

259 platform: str 

260 

261 

262class ForwardingConfirmationLinkUpdate(BaseModel): 

263 """Forwarding Confirmation Link update schema""" 

264 

265 is_used: bool 

266 

267 

268class Salary(BaseModel): 

269 min_amount: float | None = None 

270 max_amount: float | None = None 

271 currency: str | None = None 

272 

273 

274class JobInfo(BaseModel): 

275 title: str | None = None 

276 description: str | None = None 

277 url: str | None = None 

278 raw_url: str | None = None 

279 deadline: dt.datetime | None = None 

280 salary: Salary = Field(default_factory=Salary) 

281 is_closed: bool = False 

282 

283 

284class JobResult(BaseModel): 

285 platform: str | None = None 

286 job_id: str | None = None 

287 company: str | None = None 

288 company_id: str | None = None 

289 location: str | None = None 

290 raw: str | None = None 

291 job: JobInfo