Coverage for backend/app/job_email_scraping/email

1"""VeganJobs job email parser"""

3import re

5from app.job_email_scraping.email_parsers.utils import Platform

6from app.job_email_scraping.schemas import JobInfo, JobResult

8BASE_URL = "https://veganjobs.com/job/"

11def parse_veganjobs_email(body: str) -> list[JobResult]:

12 """Parse VeganJobs alert email and extract job information.

13 :param str body: email body (plain text)

14 :return: list of JobResult objects containing job information"""

16 jobs = []

18 # Split by job separators (==========================)

19 separator_pattern = r"={20,}"

20 parts = re.split(separator_pattern, body)

22 if len(parts) < 2:

23 return []

25 # The jobs section is the second part (index 1)

26 jobs_section = parts[1]

28 # Pattern to match jobs with optional employment type prefix

29 # Format: [Employment Type - ]Job Title

30 # Location: Location

31 # Company: Company Name

32 # View Details: URL

33 job_pattern = (

34 r"(?:^|\n)(?:[^\n]+ - )?([^\n]+)\n"

35 r"Location: ([^\n]+)\n"

36 r"Company: ([^\n]+)\n"

37 r"View Details: (https://veganjobs\.com/job/[^\s\)]+)"

38 )

40 matches = re.finditer(job_pattern, jobs_section, re.MULTILINE)

42 for match in matches:

43 title = match.group(1).strip()

44 location = match.group(2).strip()

45 company = match.group(3).strip()

46 url = match.group(4).strip()

48 # Extract job_id from URL

49 # URL format: https://veganjobs.com/job/company-location-title/

50 job_id = None

51 job_id_pattern = r"veganjobs\.com/job/([^/]+)/?$"

52 id_match = re.search(job_id_pattern, url)

53 if id_match:

54 job_id = id_match.group(1)

56 processed_url = BASE_URL + job_id

57 job_info = JobInfo(title=title, raw_url=url, url=processed_url)

58 job_result = JobResult(

59 company=company, job_id=job_id, location=location, job=job_info, platform=Platform.VEGANJOBS

60 )

61 jobs.append(job_result)

63 return jobs

66def extract_alert_name(alert_string: str) -> str | None:

67 """Extract alert title from VeganJobs job alert email strings.

68 :param str alert_string: alert string from email

69 :return: extracted job title or None if not found"""

71 # Pattern: Extract text between quotes

72 pattern = r'"([^"]+)"'

73 match = re.search(pattern, alert_string)

74 if match:

75 return match.group(1).strip()

77 return None

Coverage for backend / app / job_email_scraping / email_parsers / veganjobs.py: 94%

34 statements