Coverage for backend / app / job_email_scraping / email_parsers / veganjobs.py: 94%

34 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""VeganJobs job email parser""" 

2 

3import re 

4 

5from app.job_email_scraping.email_parsers.utils import Platform 

6from app.job_email_scraping.schemas import JobInfo, JobResult 

7 

8BASE_URL = "https://veganjobs.com/job/" 

9 

10 

11def parse_veganjobs_email(body: str) -> list[JobResult]: 

12 """Parse VeganJobs alert email and extract job information. 

13 :param str body: email body (plain text) 

14 :return: list of JobResult objects containing job information""" 

15 

16 jobs = [] 

17 

18 # Split by job separators (==========================) 

19 separator_pattern = r"={20,}" 

20 parts = re.split(separator_pattern, body) 

21 

22 if len(parts) < 2: 

23 return [] 

24 

25 # The jobs section is the second part (index 1) 

26 jobs_section = parts[1] 

27 

28 # Pattern to match jobs with optional employment type prefix 

29 # Format: [Employment Type - ]Job Title 

30 # Location: Location 

31 # Company: Company Name 

32 # View Details: URL 

33 job_pattern = ( 

34 r"(?:^|\n)(?:[^\n]+ - )?([^\n]+)\n" 

35 r"Location: ([^\n]+)\n" 

36 r"Company: ([^\n]+)\n" 

37 r"View Details: (https://veganjobs\.com/job/[^\s\)]+)" 

38 ) 

39 

40 matches = re.finditer(job_pattern, jobs_section, re.MULTILINE) 

41 

42 for match in matches: 

43 title = match.group(1).strip() 

44 location = match.group(2).strip() 

45 company = match.group(3).strip() 

46 url = match.group(4).strip() 

47 

48 # Extract job_id from URL 

49 # URL format: https://veganjobs.com/job/company-location-title/ 

50 job_id = None 

51 job_id_pattern = r"veganjobs\.com/job/([^/]+)/?$" 

52 id_match = re.search(job_id_pattern, url) 

53 if id_match: 

54 job_id = id_match.group(1) 

55 

56 processed_url = BASE_URL + job_id 

57 job_info = JobInfo(title=title, raw_url=url, url=processed_url) 

58 job_result = JobResult( 

59 company=company, job_id=job_id, location=location, job=job_info, platform=Platform.VEGANJOBS 

60 ) 

61 jobs.append(job_result) 

62 

63 return jobs 

64 

65 

66def extract_alert_name(alert_string: str) -> str | None: 

67 """Extract alert title from VeganJobs job alert email strings. 

68 :param str alert_string: alert string from email 

69 :return: extracted job title or None if not found""" 

70 

71 # Pattern: Extract text between quotes 

72 pattern = r'"([^"]+)"' 

73 match = re.search(pattern, alert_string) 

74 if match: 

75 return match.group(1).strip() 

76 

77 return None