Coverage for backend / app / job_email_scraping / routers / scraped_job.py: 78%

64 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""FastAPI routers for the job email scraping service endpoints. 

2 

3Provides REST API endpoints for managing job alert emails, scraped job postings, 

4and service execution logs with CRUD operations and admin access controls.""" 

5 

6import datetime as dt 

7from typing import Literal 

8 

9from fastapi import Depends, HTTPException 

10from sqlalchemy import asc, desc, or_ 

11from sqlalchemy.orm import Session, joinedload 

12from starlette import status 

13from starlette.requests import Request 

14 

15from app import models 

16from app.core.oauth2 import get_current_user 

17from app.database import get_db 

18from app.job_email_scraping import schemas 

19from app.routers.utility import generate_data_table_crud_router, filter_query 

20 

21 

22# GET endpoint for admin user to get all scraped jobs 

23scraped_job_router = generate_data_table_crud_router( 

24 table_model=models.ScrapedJob, 

25 out_schema=schemas.ScrapedJobOut, 

26 endpoint="scraped-jobs", 

27 not_found_msg="Scraped Job not found", 

28 allowed_actions=["get_all"], 

29 admin_only=True, 

30) 

31 

32 

33@scraped_job_router.get("/paged", response_model=schemas.PaginatedScrapedJobResponse) 

34def get_all( 

35 request: Request, 

36 db: Session = Depends(get_db), 

37 current_user: models.User = Depends(get_current_user), 

38 page: int = 0, 

39 page_size: int = 10, 

40 sort_by: str = "scrape_datetime", 

41 sort_direction: Literal["asc", "desc"] = "desc", 

42 show_past_deadline: bool = False, 

43 since_last_login: bool = False, 

44 search: str | None = None, 

45) -> dict: 

46 """Retrieve paginated scraped jobs for the current user that have not been imported, are active and successfully scraped. 

47 :param request: Request 

48 :param db: Database session 

49 :param current_user: Current user 

50 :param page: Page number 

51 :param page_size: Page size 

52 :param sort_by: sort key 

53 :param sort_direction: sort direction 

54 :param show_past_deadline: Show scraped jobs with past deadlines 

55 :param since_last_login: Only show jobs created since last login 

56 :param search: Search term""" 

57 

58 # Base query with eager loading of job_rating 

59 # noinspection PyComparisonWithNone 

60 query = ( 

61 db.query(models.ScrapedJob) 

62 .options(joinedload(models.ScrapedJob.job_rating)) # Always load rating 

63 .filter(models.ScrapedJob.owner_id == current_user.id) 

64 .filter(models.ScrapedJob.is_imported.is_(False)) 

65 .filter(models.ScrapedJob.is_active.is_(True)) 

66 .filter(models.ScrapedJob.exclusion_filter_id == None) 

67 ) 

68 

69 total = query.count() 

70 

71 if not show_past_deadline: 

72 query = query.filter( 

73 or_( 

74 models.ScrapedJob.deadline.is_(None), 

75 models.ScrapedJob.deadline >= dt.datetime.now(dt.timezone.utc), 

76 ), 

77 models.ScrapedJob.is_closed.is_(False), 

78 ) 

79 

80 if since_last_login and current_user.previous_login: 

81 query = query.filter(models.ScrapedJob.created_at >= current_user.previous_login) 

82 

83 # Apply search filter 

84 if search: 

85 search_term = f"%{search}%" 

86 query = query.filter( 

87 or_( 

88 models.ScrapedJob.title.ilike(search_term), 

89 models.ScrapedJob.company.ilike(search_term), 

90 models.ScrapedJob.location.ilike(search_term), 

91 models.ScrapedJob.description.ilike(search_term), 

92 models.ScrapedJob.platform.ilike(search_term), 

93 ) 

94 ) 

95 

96 # Apply filters 

97 filter_params = dict(request.query_params) 

98 filter_params.pop("page", None) 

99 filter_params.pop("page_size", None) 

100 filter_params.pop("sort_by", None) 

101 filter_params.pop("sort_direction", None) 

102 filter_params.pop("search", None) 

103 filter_params.pop("show_past_deadline", None) 

104 filter_params.pop("since_last_login", None) 

105 query = filter_query(query, models.ScrapedJob, filter_params) 

106 

107 # Apply sorting 

108 if sort_by.startswith("job_rating."): 

109 # Handle sorting by job_rating relationship attributes 

110 rating_attribute = sort_by.split(".", 1)[1] # e.g., "overall_score" 

111 

112 if hasattr(models.JobRating, rating_attribute): 

113 # Need explicit join for ORDER BY to work 

114 query = query.outerjoin(models.JobRating) 

115 sort_column = getattr(models.JobRating, rating_attribute) 

116 

117 if sort_direction == "desc": 

118 query = query.order_by(desc(sort_column).nulls_last()) 

119 else: 

120 query = query.order_by(asc(sort_column).nulls_last()) 

121 else: 

122 # Default sorting if invalid column 

123 query = query.order_by(desc(models.ScrapedJob.scrape_datetime).nulls_last()) 

124 elif hasattr(models.ScrapedJob, sort_by): 

125 sort_column = getattr(models.ScrapedJob, sort_by) 

126 if sort_direction == "desc": 

127 query = query.order_by(desc(sort_column).nulls_last()) 

128 else: 

129 query = query.order_by(asc(sort_column).nulls_last()) 

130 else: 

131 # Default sorting if invalid column 

132 query = query.order_by(desc(models.ScrapedJob.scrape_datetime).nulls_last()) 

133 

134 # Get total count before pagination 

135 total_filtered = query.count() 

136 

137 # Calculate pagination 

138 offset = page * page_size 

139 total_pages = (total_filtered + page_size - 1) // page_size if total_filtered > 0 else 1 

140 

141 # Apply pagination 

142 results = query.offset(offset).limit(page_size).all() 

143 

144 return { 

145 "items": results, 

146 "total": total, 

147 "total_filtered": total_filtered, 

148 "page": page, 

149 "page_size": page_size, 

150 "total_pages": total_pages, 

151 } 

152 

153 

154@scraped_job_router.get("/by-email/{email_id}", response_model=list[schemas.ScrapedJobOut]) 

155def get_scraped_jobs_by_email( 

156 email_id: int, 

157 current_user: models.User = Depends(get_current_user), 

158 db: Session = Depends(get_db), 

159): 

160 """Get scraped jobs associated with a specific job email for the current user. 

161 :param email_id: ID of the job email 

162 :param current_user: Current authenticated user 

163 :param db: Database session 

164 :return: List of scraped jobs linked to the email""" 

165 

166 email = ( 

167 db.query(models.JobEmail) 

168 .filter(models.JobEmail.id == email_id) 

169 .filter(models.JobEmail.owner_id == current_user.id) 

170 .first() 

171 ) 

172 if not email: 

173 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job email not found") 

174 return email.jobs 

175 

176 

177@scraped_job_router.get("/filtered-by-filter/{filter_id}", response_model=list[schemas.ScrapedJobOut]) 

178def get_scraped_jobs_filtered_by_filter( 

179 filter_id: int, 

180 current_user: models.User = Depends(get_current_user), 

181 db: Session = Depends(get_db), 

182): 

183 """Get scraped jobs associated with a specific filter for the current user. 

184 :param filter_id: ID of the filter 

185 :param current_user: Current authenticated user 

186 :param db: Database session 

187 :return: List of scraped jobs associated with the filter""" 

188 

189 scraped_jobs = ( 

190 db.query(models.ScrapedJob) 

191 .filter(models.ScrapedJob.owner_id == current_user.id) 

192 .filter(models.ScrapedJob.exclusion_filter_id == filter_id) 

193 .all() 

194 ) 

195 return scraped_jobs 

196 

197 

198# PUT endpoint for regular users to update the entries 

199generate_data_table_crud_router( 

200 table_model=models.ScrapedJob, 

201 update_schema=schemas.ScrapedJobUpdate, 

202 out_schema=schemas.ScrapedJobOut, 

203 endpoint="scraped-jobs", 

204 not_found_msg="Scraped Job not found", 

205 allowed_actions=["put"], 

206 router=scraped_job_router, 

207)