Coverage for backend / app / job_email_scraping / email_parsers / veganjobs.py: 94%
34 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
1"""VeganJobs job email parser"""
3import re
5from app.job_email_scraping.email_parsers.utils import Platform
6from app.job_email_scraping.schemas import JobInfo, JobResult
8BASE_URL = "https://veganjobs.com/job/"
11def parse_veganjobs_email(body: str) -> list[JobResult]:
12 """Parse VeganJobs alert email and extract job information.
13 :param str body: email body (plain text)
14 :return: list of JobResult objects containing job information"""
16 jobs = []
18 # Split by job separators (==========================)
19 separator_pattern = r"={20,}"
20 parts = re.split(separator_pattern, body)
22 if len(parts) < 2:
23 return []
25 # The jobs section is the second part (index 1)
26 jobs_section = parts[1]
28 # Pattern to match jobs with optional employment type prefix
29 # Format: [Employment Type - ]Job Title
30 # Location: Location
31 # Company: Company Name
32 # View Details: URL
33 job_pattern = (
34 r"(?:^|\n)(?:[^\n]+ - )?([^\n]+)\n"
35 r"Location: ([^\n]+)\n"
36 r"Company: ([^\n]+)\n"
37 r"View Details: (https://veganjobs\.com/job/[^\s\)]+)"
38 )
40 matches = re.finditer(job_pattern, jobs_section, re.MULTILINE)
42 for match in matches:
43 title = match.group(1).strip()
44 location = match.group(2).strip()
45 company = match.group(3).strip()
46 url = match.group(4).strip()
48 # Extract job_id from URL
49 # URL format: https://veganjobs.com/job/company-location-title/
50 job_id = None
51 job_id_pattern = r"veganjobs\.com/job/([^/]+)/?$"
52 id_match = re.search(job_id_pattern, url)
53 if id_match:
54 job_id = id_match.group(1)
56 processed_url = BASE_URL + job_id
57 job_info = JobInfo(title=title, raw_url=url, url=processed_url)
58 job_result = JobResult(
59 company=company, job_id=job_id, location=location, job=job_info, platform=Platform.VEGANJOBS
60 )
61 jobs.append(job_result)
63 return jobs
66def extract_alert_name(alert_string: str) -> str | None:
67 """Extract alert title from VeganJobs job alert email strings.
68 :param str alert_string: alert string from email
69 :return: extracted job title or None if not found"""
71 # Pattern: Extract text between quotes
72 pattern = r'"([^"]+)"'
73 match = re.search(pattern, alert_string)
74 if match:
75 return match.group(1).strip()
77 return None