Coverage for backend / app / demo / seed.py: 99%

124 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""Demo data seeding and cleanup functions. 

2Reuses test data defined for user 1 (owner_id=1) with proper ID remapping.""" 

3 

4import copy 

5 

6from sqlalchemy.orm import Session 

7 

8from app import models 

9from tests.utils.create_data.utils import create_db_entries, override_properties 

10from tests.utils.test_data import data_tables 

11from tests.utils.test_data import job_rating 

12from tests.utils.test_data import job_scraping 

13from tests.utils.test_data.utils import add_mappings 

14 

15PREV_OWNER = 1 # Reuse test data from user 1 

16 

17 

18def _filter_owner(data: list[dict], new_owner_id: int) -> list[dict]: 

19 """Deep copy, filter by PREV_OWNER, and set new owner_id. 

20 :param data: List of dictionaries to filter and remap 

21 :param new_owner_id: New owner_id to set for filtered entries 

22 :return: Filtered list of dictionaries with new owner_id set""" 

23 

24 return [{**entry, "owner_id": new_owner_id} for entry in copy.deepcopy(data) if entry.get("owner_id") == PREV_OWNER] 

25 

26 

27def _build_index_map(data: list[dict]) -> dict[int, int]: 

28 """Build mapping from original 1-based position to filtered 1-based position 

29 for entries belonging to PREV_OWNER. 

30 :param data: List of dictionaries to build mapping for (must contain owner_id) 

31 :return: Mapping from original position to filtered position""" 

32 

33 index_map = {} 

34 filtered_pos = 1 

35 for i, entry in enumerate(data): 

36 if entry.get("owner_id") == PREV_OWNER: 

37 index_map[i + 1] = filtered_pos 

38 filtered_pos += 1 

39 return index_map 

40 

41 

42def _remap_with_map( 

43 data: list[dict], 

44 key: str, 

45 index_map: dict[int, int], 

46 objects: list, 

47) -> list[dict]: 

48 """Remap FK values using an index map. Drops entries with unmapped references. 

49 :param data: List of dictionaries to remap 

50 :param key: Key to remap 

51 :param index_map: Mapping from original position to filtered position 

52 :param objects: List of objects to remap against 

53 :return: Remapped list of dictionaries, with dropped entries if any references were unmapped""" 

54 

55 result = [] 

56 for entry in data: 

57 val = entry.get(key) 

58 if val is not None: 

59 if val not in index_map: 

60 continue 

61 entry[key] = objects[index_map[val] - 1].id 

62 result.append(entry) 

63 return result 

64 

65 

66def _filter_mappings( 

67 mappings: list[dict], 

68 primary_key: str, 

69 secondary_key: str, 

70 primary_map: dict[int, int], 

71 secondary_map: dict[int, int] | None = None, 

72) -> list[dict]: 

73 """Filter and remap M2M mapping data to only include user 1's entries. 

74 :param mappings: List of M2M mapping dictionaries to filter and remap 

75 :param primary_key: Key to filter by for primary table 

76 :param secondary_key: Key to filter by for secondary table 

77 :param primary_map: Mapping from original position to filtered position for primary table 

78 :param secondary_map: Optional mapping from original position to filtered position for secondary table 

79 :return: Filtered and remapped list of M2M mapping dictionaries""" 

80 

81 result = [] 

82 for mapping in mappings: 

83 if mapping[primary_key] not in primary_map: 

84 continue 

85 sec_ids = mapping[secondary_key] 

86 if secondary_map: 

87 sec_ids = [secondary_map[sid] for sid in sec_ids if sid in secondary_map] 

88 if sec_ids: 

89 result.append( 

90 { 

91 primary_key: primary_map[mapping[primary_key]], 

92 secondary_key: sec_ids, 

93 } 

94 ) 

95 return result 

96 

97 

98def seed_demo_data(db: Session, user: models.User) -> None: 

99 """Create all demo data for a user in the demo schema. 

100 :param db: Database session (bound to demo schema) 

101 :param user: The demo user to seed data for""" 

102 

103 owner_id = user.id 

104 

105 # Geolocations 

106 geolocations = db.query(models.Geolocation).all() 

107 

108 # User Qualifications 

109 qualification_data = [ 

110 { 

111 "owner_id": owner_id, 

112 "experience": "5 years of full-stack web development with Python and JavaScript frameworks. " 

113 "Led a team of 3 developers at a SaaS startup. Built RESTful APIs and microservices.", 

114 "skills": "Python, JavaScript, TypeScript, React, FastAPI, PostgreSQL, Docker, AWS, Git, CI/CD", 

115 "education": "BSc Computer Science, University of Manchester (2019)", 

116 "qualities": "Strong problem-solver, collaborative team player, detail-oriented, adaptable", 

117 "interests": "Backend engineering, cloud infrastructure, developer tooling, open-source", 

118 } 

119 ] 

120 qualifications = create_db_entries(db, models.UserQualification, qualification_data) 

121 

122 # Companies 

123 companies = create_db_entries(db, models.Company, _filter_owner(data_tables.COMPANY_DATA, owner_id)) 

124 

125 # Locations (remap geolocation_id) 

126 location_data = override_properties( 

127 _filter_owner(data_tables.LOCATION_DATA, owner_id), ("geolocation_id", geolocations) 

128 ) 

129 locations = create_db_entries(db, models.Location, location_data) 

130 

131 # Persons (remap company_id) 

132 person_data = override_properties(_filter_owner(data_tables.PERSON_DATA, owner_id), ("company_id", companies)) 

133 persons = create_db_entries(db, models.Person, person_data) 

134 

135 # Keywords 

136 keywords = create_db_entries(db, models.Keyword, _filter_owner(data_tables.KEYWORD_DATA, owner_id)) 

137 

138 # Aggregators 

139 aggregators = create_db_entries(db, models.Aggregator, _filter_owner(data_tables.AGGREGATOR_DATA, owner_id)) 

140 

141 # Files 

142 files = create_db_entries(db, models.File, _filter_owner(data_tables.FILE_DATA, owner_id)) 

143 

144 # Jobs (remap multiple FKs) 

145 job_data = override_properties( 

146 _filter_owner(data_tables.JOB_DATA, owner_id), 

147 ("company_id", companies), 

148 ("location_id", locations), 

149 ("source_aggregator_id", aggregators), 

150 ("cv_id", files), 

151 ("cover_letter_id", files), 

152 ("application_aggregator_id", aggregators), 

153 ("recruiter_id", persons), 

154 ("recruitment_company_id", companies), 

155 ) 

156 jobs = create_db_entries(db, models.Job, job_data) 

157 

158 # Job M2M mappings (keywords, contacts) 

159 job_map = _build_index_map(data_tables.JOB_DATA) 

160 keyword_map = _build_index_map(data_tables.KEYWORD_DATA) 

161 person_map = _build_index_map(data_tables.PERSON_DATA) 

162 

163 add_mappings( 

164 jobs, 

165 keywords, 

166 _filter_mappings(data_tables.JOB_KEYWORD_MAPPINGS, "job_id", "keyword_ids", job_map, keyword_map), 

167 "job_id", 

168 "keyword_ids", 

169 "keywords", 

170 ) 

171 add_mappings( 

172 jobs, 

173 persons, 

174 _filter_mappings(data_tables.JOB_CONTACT_MAPPINGS, "job_id", "person_ids", job_map, person_map), 

175 "job_id", 

176 "person_ids", 

177 "contacts", 

178 ) 

179 db.flush() 

180 

181 # Interviews (remap location_id, job_id) 

182 interview_data = override_properties( 

183 _filter_owner(data_tables.INTERVIEW_DATA, owner_id), 

184 ("location_id", locations), 

185 ("job_id", jobs), 

186 ) 

187 interviews = create_db_entries(db, models.Interview, interview_data) 

188 

189 # Interview ↔ Interviewer mappings 

190 interview_map = _build_index_map(data_tables.INTERVIEW_DATA) 

191 add_mappings( 

192 interviews, 

193 persons, 

194 _filter_mappings( 

195 data_tables.INTERVIEW_INTERVIEWER_MAPPINGS, "interview_id", "person_ids", interview_map, person_map 

196 ), 

197 "interview_id", 

198 "person_ids", 

199 "interviewers", 

200 ) 

201 db.flush() 

202 

203 # Job Application Updates (remap job_id) 

204 app_update_data = override_properties( 

205 _filter_owner(data_tables.JOB_APPLICATION_UPDATE_DATA, owner_id), ("job_id", jobs) 

206 ) 

207 create_db_entries(db, models.JobApplicationUpdate, app_update_data) 

208 

209 # Speculative Applications (remap company_id) 

210 spec_app_data = override_properties( 

211 _filter_owner(data_tables.SPECULATIVE_APPLICATION_DATA, owner_id), ("company_id", companies) 

212 ) 

213 spec_apps = create_db_entries(db, models.SpeculativeApplication, spec_app_data) 

214 

215 # Speculative application ↔ Contact mappings 

216 spec_app_map = _build_index_map(data_tables.SPECULATIVE_APPLICATION_DATA) 

217 add_mappings( 

218 spec_apps, 

219 persons, 

220 _filter_mappings( 

221 data_tables.SPECULATIVE_APPLICATION_CONTACTS_MAPPING, 

222 "speculative_application_id", 

223 "contact_ids", 

224 spec_app_map, 

225 person_map, 

226 ), 

227 "speculative_application_id", 

228 "contact_ids", 

229 "contacts", 

230 ) 

231 db.flush() 

232 

233 # Scraping Service Logs (not owner-scoped) 

234 scraping_log_data = copy.deepcopy(job_scraping.JOB_SCRAPING_SERVICE_LOG_DATA) 

235 for log in scraping_log_data: 

236 log["user_found_ids"] = [owner_id] if log["user_found_ids"] else [] 

237 log["user_processed_ids"] = [owner_id] if log["user_processed_ids"] else [] 

238 scraping_logs = create_db_entries(db, models.JobEmailScrapingServiceLog, scraping_log_data) 

239 

240 # Scraping Exclusion Filters 

241 filters = create_db_entries( 

242 db, models.ScrapingExclusionFilter, _filter_owner(job_scraping.SCRAPING_FILTER_DATA, owner_id) 

243 ) 

244 

245 # Job Emails (remap service_log_id, make external_email_id unique per demo user) 

246 email_data = override_properties( 

247 _filter_owner(job_scraping.JOB_EMAIL_DATA, owner_id), ("service_log_id", scraping_logs) 

248 ) 

249 for entry in email_data: 

250 entry["external_email_id"] = f"{entry['external_email_id']}_{owner_id}" 

251 emails = create_db_entries(db, models.JobEmail, email_data) 

252 

253 # Scraped Jobs (remap service_log_id, exclusion_filter_id, geolocation_id, make external_job_id unique per demo user) 

254 scraped_job_data = override_properties( 

255 _filter_owner(job_scraping.SCRAPED_JOB_DATA, owner_id), 

256 ("service_log_id", scraping_logs), 

257 ("exclusion_filter_id", filters), 

258 ("geolocation_id", geolocations), 

259 ) 

260 for entry in scraped_job_data: 

261 entry["external_job_id"] = f"{entry['external_job_id']}_{owner_id}" 

262 scraped_jobs = create_db_entries(db, models.ScrapedJob, scraped_job_data) 

263 

264 # Email ↔ ScrapedJob mappings (uses index maps for interleaved data) 

265 email_map = _build_index_map(job_scraping.JOB_EMAIL_DATA) 

266 scraped_job_map = _build_index_map(job_scraping.SCRAPED_JOB_DATA) 

267 

268 for mapping in job_scraping.EMAIL_SCRAPEDJOB_MAPPINGS: 

269 if mapping["email_id"] not in email_map: 

270 continue 

271 email_obj = emails[email_map[mapping["email_id"]] - 1] 

272 for sj_id in mapping["scraped_job_ids"]: 

273 if sj_id in scraped_job_map: 

274 email_obj.jobs.append(scraped_jobs[scraped_job_map[sj_id] - 1]) 

275 db.flush() 

276 

277 # Rating Service Logs (not owner-scoped) 

278 rating_log_data = copy.deepcopy(job_rating.JOB_RATING_SERVICE_LOG_DATA) 

279 for log in rating_log_data: 

280 log["user_found_ids"] = [owner_id] if log["user_found_ids"] else [] 

281 log["user_processed_ids"] = [owner_id] if log["user_processed_ids"] else [] 

282 log["job_found_ids"] = [] 

283 log["job_succeeded_ids"] = [] 

284 log["job_failed_ids"] = [] 

285 log["job_skipped_ids"] = [] 

286 create_db_entries(db, models.JobRatingServiceLog, rating_log_data) 

287 

288 # Job Ratings (remap scraped_job_id using interleaved index map, remap user_qualification_id) 

289 ai_system_prompt = db.query(models.AiSystemPrompt).first() 

290 ai_job_template = db.query(models.AiJobPromptTemplate).first() 

291 

292 rating_data = _filter_owner(job_rating.JOB_RATING_DATA, owner_id) 

293 rating_data = _remap_with_map(rating_data, "scraped_job_id", scraped_job_map, scraped_jobs) 

294 

295 for entry in rating_data: 

296 # Remap user_qualification_id (user 1's qualifications are consecutive at start) 

297 qual_id = entry.get("user_qualification_id") 

298 if qual_id is not None and qual_id <= len(qualifications): 

299 entry["user_qualification_id"] = qualifications[qual_id - 1].id 

300 else: 

301 entry["user_qualification_id"] = qualifications[0].id if qualifications else None 

302 # Set prompt IDs from demo schema 

303 entry["system_prompt_id"] = ai_system_prompt.id if ai_system_prompt else None 

304 entry["job_prompt_template_id"] = ai_job_template.id if ai_job_template else None 

305 

306 create_db_entries(db, models.JobRating, rating_data) 

307 

308 db.commit() 

309 

310 

311def delete_user(db: Session, user_id: int) -> None: 

312 """Delete a user and all cascade delete all their owned data. 

313 :param db: Database session (bound to demo schema) 

314 :param user_id: The ID of the demo user to delete""" 

315 

316 user = db.query(models.User).filter(models.User.id == user_id).first() 

317 if user: 

318 db.delete(user) 

319 db.commit()