Coverage for backend / app / job_email_scraping / routers / scraped_job.py: 78%
64 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
1"""FastAPI routers for the job email scraping service endpoints.
3Provides REST API endpoints for managing job alert emails, scraped job postings,
4and service execution logs with CRUD operations and admin access controls."""
6import datetime as dt
7from typing import Literal
9from fastapi import Depends, HTTPException
10from sqlalchemy import asc, desc, or_
11from sqlalchemy.orm import Session, joinedload
12from starlette import status
13from starlette.requests import Request
15from app import models
16from app.core.oauth2 import get_current_user
17from app.database import get_db
18from app.job_email_scraping import schemas
19from app.routers.utility import generate_data_table_crud_router, filter_query
22# GET endpoint for admin user to get all scraped jobs
23scraped_job_router = generate_data_table_crud_router(
24 table_model=models.ScrapedJob,
25 out_schema=schemas.ScrapedJobOut,
26 endpoint="scraped-jobs",
27 not_found_msg="Scraped Job not found",
28 allowed_actions=["get_all"],
29 admin_only=True,
30)
33@scraped_job_router.get("/paged", response_model=schemas.PaginatedScrapedJobResponse)
34def get_all(
35 request: Request,
36 db: Session = Depends(get_db),
37 current_user: models.User = Depends(get_current_user),
38 page: int = 0,
39 page_size: int = 10,
40 sort_by: str = "scrape_datetime",
41 sort_direction: Literal["asc", "desc"] = "desc",
42 show_past_deadline: bool = False,
43 since_last_login: bool = False,
44 search: str | None = None,
45) -> dict:
46 """Retrieve paginated scraped jobs for the current user that have not been imported, are active and successfully scraped.
47 :param request: Request
48 :param db: Database session
49 :param current_user: Current user
50 :param page: Page number
51 :param page_size: Page size
52 :param sort_by: sort key
53 :param sort_direction: sort direction
54 :param show_past_deadline: Show scraped jobs with past deadlines
55 :param since_last_login: Only show jobs created since last login
56 :param search: Search term"""
58 # Base query with eager loading of job_rating
59 # noinspection PyComparisonWithNone
60 query = (
61 db.query(models.ScrapedJob)
62 .options(joinedload(models.ScrapedJob.job_rating)) # Always load rating
63 .filter(models.ScrapedJob.owner_id == current_user.id)
64 .filter(models.ScrapedJob.is_imported.is_(False))
65 .filter(models.ScrapedJob.is_active.is_(True))
66 .filter(models.ScrapedJob.exclusion_filter_id == None)
67 )
69 total = query.count()
71 if not show_past_deadline:
72 query = query.filter(
73 or_(
74 models.ScrapedJob.deadline.is_(None),
75 models.ScrapedJob.deadline >= dt.datetime.now(dt.timezone.utc),
76 ),
77 models.ScrapedJob.is_closed.is_(False),
78 )
80 if since_last_login and current_user.previous_login:
81 query = query.filter(models.ScrapedJob.created_at >= current_user.previous_login)
83 # Apply search filter
84 if search:
85 search_term = f"%{search}%"
86 query = query.filter(
87 or_(
88 models.ScrapedJob.title.ilike(search_term),
89 models.ScrapedJob.company.ilike(search_term),
90 models.ScrapedJob.location.ilike(search_term),
91 models.ScrapedJob.description.ilike(search_term),
92 models.ScrapedJob.platform.ilike(search_term),
93 )
94 )
96 # Apply filters
97 filter_params = dict(request.query_params)
98 filter_params.pop("page", None)
99 filter_params.pop("page_size", None)
100 filter_params.pop("sort_by", None)
101 filter_params.pop("sort_direction", None)
102 filter_params.pop("search", None)
103 filter_params.pop("show_past_deadline", None)
104 filter_params.pop("since_last_login", None)
105 query = filter_query(query, models.ScrapedJob, filter_params)
107 # Apply sorting
108 if sort_by.startswith("job_rating."):
109 # Handle sorting by job_rating relationship attributes
110 rating_attribute = sort_by.split(".", 1)[1] # e.g., "overall_score"
112 if hasattr(models.JobRating, rating_attribute):
113 # Need explicit join for ORDER BY to work
114 query = query.outerjoin(models.JobRating)
115 sort_column = getattr(models.JobRating, rating_attribute)
117 if sort_direction == "desc":
118 query = query.order_by(desc(sort_column).nulls_last())
119 else:
120 query = query.order_by(asc(sort_column).nulls_last())
121 else:
122 # Default sorting if invalid column
123 query = query.order_by(desc(models.ScrapedJob.scrape_datetime).nulls_last())
124 elif hasattr(models.ScrapedJob, sort_by):
125 sort_column = getattr(models.ScrapedJob, sort_by)
126 if sort_direction == "desc":
127 query = query.order_by(desc(sort_column).nulls_last())
128 else:
129 query = query.order_by(asc(sort_column).nulls_last())
130 else:
131 # Default sorting if invalid column
132 query = query.order_by(desc(models.ScrapedJob.scrape_datetime).nulls_last())
134 # Get total count before pagination
135 total_filtered = query.count()
137 # Calculate pagination
138 offset = page * page_size
139 total_pages = (total_filtered + page_size - 1) // page_size if total_filtered > 0 else 1
141 # Apply pagination
142 results = query.offset(offset).limit(page_size).all()
144 return {
145 "items": results,
146 "total": total,
147 "total_filtered": total_filtered,
148 "page": page,
149 "page_size": page_size,
150 "total_pages": total_pages,
151 }
154@scraped_job_router.get("/by-email/{email_id}", response_model=list[schemas.ScrapedJobOut])
155def get_scraped_jobs_by_email(
156 email_id: int,
157 current_user: models.User = Depends(get_current_user),
158 db: Session = Depends(get_db),
159):
160 """Get scraped jobs associated with a specific job email for the current user.
161 :param email_id: ID of the job email
162 :param current_user: Current authenticated user
163 :param db: Database session
164 :return: List of scraped jobs linked to the email"""
166 email = (
167 db.query(models.JobEmail)
168 .filter(models.JobEmail.id == email_id)
169 .filter(models.JobEmail.owner_id == current_user.id)
170 .first()
171 )
172 if not email:
173 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job email not found")
174 return email.jobs
177@scraped_job_router.get("/filtered-by-filter/{filter_id}", response_model=list[schemas.ScrapedJobOut])
178def get_scraped_jobs_filtered_by_filter(
179 filter_id: int,
180 current_user: models.User = Depends(get_current_user),
181 db: Session = Depends(get_db),
182):
183 """Get scraped jobs associated with a specific filter for the current user.
184 :param filter_id: ID of the filter
185 :param current_user: Current authenticated user
186 :param db: Database session
187 :return: List of scraped jobs associated with the filter"""
189 scraped_jobs = (
190 db.query(models.ScrapedJob)
191 .filter(models.ScrapedJob.owner_id == current_user.id)
192 .filter(models.ScrapedJob.exclusion_filter_id == filter_id)
193 .all()
194 )
195 return scraped_jobs
198# PUT endpoint for regular users to update the entries
199generate_data_table_crud_router(
200 table_model=models.ScrapedJob,
201 update_schema=schemas.ScrapedJobUpdate,
202 out_schema=schemas.ScrapedJobOut,
203 endpoint="scraped-jobs",
204 not_found_msg="Scraped Job not found",
205 allowed_actions=["put"],
206 router=scraped_job_router,
207)