Coverage for backend / app / job_email_scraping / schemas.py: 100%
162 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
1"""Pydantic schemas for the Job Email Scraping.
2Contains data models for job alert emails, scraped job postings, and service logs
3used in the external job scraping and notification system."""
5import datetime as dt
6from datetime import datetime
8from pydantic import field_validator, Field
10from app.base_schemas import BaseModel, OwnedOut, Out, serialise_relationships
11from app.data_tables.schemas import GeolocationOut
12from app.job_rating.schemas import JobRatingOut
15# --------------------------------------------------- JOB ALERT EMAIL --------------------------------------------------
18class JobEmail(BaseModel):
19 """Job Alert Email base schema"""
21 external_email_id: str | None
22 subject: str | None = None
23 sender: str | None = None
24 date_received: datetime | None = None
25 platform: str | None = None
26 body: str | None = None
27 service_log_id: int | None = None
28 job_found_n: int | None = 0
29 alert_name: str | None = None
32class JobEmailUpdate(JobEmail):
33 """Job Alert Email update schema"""
35 pass
38class JobEmailOut(JobEmail, OwnedOut):
39 """Job Alert Email output schema"""
41 jobs: list[int]
43 @field_validator("jobs", mode="before")
44 @classmethod
45 def serialize_relationships(cls, value) -> list[int]:
46 """Serialize relationships to list of IDs"""
48 return serialise_relationships(value)
51# ----------------------------------------------------- SCRAPED JOB ----------------------------------------------------
54class ScrapedJob(BaseModel):
55 """Scraped Job base schema"""
57 external_job_id: str
58 platform: str
59 service_log_id: int
60 is_processed: bool = False
61 is_scraped: bool = False
62 is_failed: bool = False
63 scrape_datetime: datetime | None = None
64 scrape_error: list[dict] = []
65 is_active: bool = True
66 is_imported: bool = False
67 is_skipped: bool = False
68 skip_reason: str | None = None
69 retry_count: int = 0
70 next_retry_at: datetime | None = None
72 # Job data
73 title: str | None = None
74 description: str | None = None
75 salary_min: float | None = None
76 salary_max: float | None = None
77 salary_currency: str | None = None
78 url: str | None = None
79 deadline: datetime | None = None
80 parsed_location: str | None = None
81 attendance_type: str | None = None
82 is_closed: bool = False
83 location: str | None = None
84 location_city: str | None = None
85 location_postcode: str | None = None
86 location_country: str | None = None
87 company: str | None = None
90class ScrapedJobUpdate(BaseModel):
91 """Scraped Job update schema"""
93 is_active: bool | None = None
94 is_imported: bool | None = None
97class ScrapedJobOut(ScrapedJob, OwnedOut):
98 """Scraped Job output schema"""
100 emails: list[int]
101 job_rating: JobRatingOut | None
102 geolocation: GeolocationOut | None
104 @field_validator("emails", mode="before")
105 @classmethod
106 def serialize_relationships(cls, value) -> list[int]:
107 """Serialize relationships to list of IDs"""
109 return serialise_relationships(value)
112class PaginatedScrapedJobResponse(BaseModel):
113 """Paginated Scraped Job response schema"""
115 items: list[ScrapedJobOut]
116 total: int
117 total_filtered: int
118 page: int
119 page_size: int
120 total_pages: int
123class PaginatedJobEmailResponse(BaseModel):
124 """Paginated Job Email response schema"""
126 items: list[JobEmailOut]
127 total: int
128 total_filtered: int
129 page: int
130 page_size: int
131 total_pages: int
134# ----------------------------------------------------- SERVICE LOG ----------------------------------------------------
137class JobEmailScrapingServiceLogOut(Out):
138 """Job Email Scraping Service Log output schema"""
140 run_datetime: datetime | None = None
141 run_duration: float | None = None
142 is_success: bool | None = None
143 error_message: str | None = None
145 # Users
146 user_found_ids: list[int] = []
147 user_processed_ids: list[int] = []
149 # Emails
150 email_found_n: int = 0
151 email_saved_n: int = 0
152 email_skipped_n: int = 0
154 # Jobs
155 job_found_n: int = 0
156 job_to_process_n: int = 0
157 job_scrape_succeeded_n: int = 0
158 job_scrape_failed_n: int = 0
159 job_scrape_copied_n: int = 0
160 job_scrape_skipped_n: int = 0
162 # Relationships
163 emails: list[int]
164 scraped_jobs: list[int]
165 platform_stats: list["JobEmailScrapingPlatformStatOut"]
166 service_errors: list["JobEmailScrapingServiceErrorOut"]
168 @field_validator("emails", "scraped_jobs", mode="before")
169 @classmethod
170 def serialize_relationships(cls, value) -> list[int]:
171 """Serialize relationships to list of IDs"""
172 return serialise_relationships(value)
175# --------------------------------------------------- PLATFORM STATS ---------------------------------------------------
178class JobEmailScrapingPlatformStatOut(Out):
179 """Job Email Scraping Platform Stat output schema"""
181 name: str | None = None
183 # Jobs
184 job_found_ids: list[int] = []
185 job_to_process_ids: list[int] = []
186 job_scrape_succeeded_ids: list[int] = []
187 job_scrape_failed_ids: list[int] = []
188 job_scrape_copied_ids: list[int] = []
189 job_scrape_skipped_ids: list[int] = []
191 # Emails
192 email_saved_ids: list[int] = []
193 email_skipped_ids: list[int] = []
195 service_log_id: int | None = None
198# --------------------------------------------- JOB SCRAPING SERVICE ERROR ---------------------------------------------
201class JobEmailScrapingServiceErrorOut(Out):
202 """Job Email Scraping Service Error output schema"""
204 error_type: str
205 message: str
206 traceback: str
209# ------------------------------------------------ EMAIL SCRAPER SERVICE -----------------------------------------------
212class JobEmailScrapingStartRequest(BaseModel):
213 """Start Request schema for email scraper service"""
215 period_hours: float | None = 3.0
216 timedelta_days: int | None = 1
219# ------------------------------------------------- SCRAPED JOB FILTER -------------------------------------------------
222class ScrapingFilterCreate(BaseModel):
223 """Scraped Job Filter creation schema"""
225 type: str
226 value: str
227 operator: str
228 is_active: bool = True
229 case_sensitive: bool = False
232class ScrapingFilterUpdate(ScrapingFilterCreate):
233 """Scraped Job Filter update schema"""
235 type: str | None = None
236 value: str | None = None
237 operator: str | None = None
240class ScrapingFilterOut(OwnedOut, ScrapingFilterCreate):
241 """Scraped Job Filter output schema"""
243 filtered_jobs: list[int]
245 @field_validator("filtered_jobs", mode="before")
246 @classmethod
247 def serialize_relationships(cls, value) -> list[int]:
248 """Serialize relationships to list of IDs"""
249 return serialise_relationships(value)
252# ------------------------------------------- FORWARDING CONFIRMATION LINK ---------------------------------------------
255class ForwardingConfirmationLinkOut(OwnedOut):
256 """Forwarding Confirmation Link output schema"""
258 url: str
259 platform: str
262class ForwardingConfirmationLinkUpdate(BaseModel):
263 """Forwarding Confirmation Link update schema"""
265 is_used: bool
268class Salary(BaseModel):
269 min_amount: float | None = None
270 max_amount: float | None = None
271 currency: str | None = None
274class JobInfo(BaseModel):
275 title: str | None = None
276 description: str | None = None
277 url: str | None = None
278 raw_url: str | None = None
279 deadline: dt.datetime | None = None
280 salary: Salary = Field(default_factory=Salary)
281 is_closed: bool = False
284class JobResult(BaseModel):
285 platform: str | None = None
286 job_id: str | None = None
287 company: str | None = None
288 company_id: str | None = None
289 location: str | None = None
290 raw: str | None = None
291 job: JobInfo