Coverage for backend / app / job_email_scraping / email_parsers / utils.py: 86%
22 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
1"""Utility functions for email parsers"""
3from enum import Enum
5from bs4 import BeautifulSoup
8def process_salary(salary_str: str) -> int | None:
9 """Convert salary string with K/M suffix to numeric value.
10 :param salary_str: Salary string with optional K/M suffix
11 :return: Numeric salary value or None"""
13 if not salary_str:
14 return None
16 # Remove any whitespace and commas
17 salary_str = salary_str.strip().replace(",", "")
19 # Check for K (thousands)
20 if salary_str.endswith("K"):
21 return int(float(salary_str[:-1]) * 1000)
22 else:
23 # Already a plain number
24 try:
25 return int(float(salary_str))
26 except:
27 return None
30class Platform(str, Enum):
31 """Platform Enum for job sources."""
33 INDEED = "indeed"
34 LINKEDIN = "linkedin"
35 VEGANJOBS = "veganjobs"
36 NHS = "nhs"
39def remove_style_tags(body: str) -> str:
40 """Remove <style> tags from HTML content.
41 :param str body: HTML content
42 :return: HTML content without <style> tags"""
44 soup = BeautifulSoup(body, "html.parser", from_encoding="utf-8")
45 for style in soup.find_all("style"):
46 style.decompose()
47 return str(soup)