Coverage for backend / app / job_email_scraping / email_parsers / utils.py: 86%

22 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""Utility functions for email parsers""" 

2 

3from enum import Enum 

4 

5from bs4 import BeautifulSoup 

6 

7 

8def process_salary(salary_str: str) -> int | None: 

9 """Convert salary string with K/M suffix to numeric value. 

10 :param salary_str: Salary string with optional K/M suffix 

11 :return: Numeric salary value or None""" 

12 

13 if not salary_str: 

14 return None 

15 

16 # Remove any whitespace and commas 

17 salary_str = salary_str.strip().replace(",", "") 

18 

19 # Check for K (thousands) 

20 if salary_str.endswith("K"): 

21 return int(float(salary_str[:-1]) * 1000) 

22 else: 

23 # Already a plain number 

24 try: 

25 return int(float(salary_str)) 

26 except: 

27 return None 

28 

29 

30class Platform(str, Enum): 

31 """Platform Enum for job sources.""" 

32 

33 INDEED = "indeed" 

34 LINKEDIN = "linkedin" 

35 VEGANJOBS = "veganjobs" 

36 NHS = "nhs" 

37 

38 

39def remove_style_tags(body: str) -> str: 

40 """Remove <style> tags from HTML content. 

41 :param str body: HTML content 

42 :return: HTML content without <style> tags""" 

43 

44 soup = BeautifulSoup(body, "html.parser", from_encoding="utf-8") 

45 for style in soup.find_all("style"): 

46 style.decompose() 

47 return str(soup)