Coverage for backend / app / job_email_scraping / job_scrapers / linkedin.py: 39%

23 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-17 21:34 +0000

1"""LinkedIn Job Scraper using Brightdata""" 

2 

3from app.job_email_scraping.job_scrapers.brightdata import BrightdataJobScraper 

4from app.job_email_scraping.schemas import Salary, JobInfo, JobResult 

5 

6 

7class LinkedinBrightdataJobScraper(BrightdataJobScraper): 

8 """LinkedIn Scraper""" 

9 

10 base_url = "https://www.linkedin.com/jobs/view/" 

11 name = "linkedin" 

12 poll_interval: int | float = 2 

13 max_attempts: int = 60 

14 

15 def _process_job_data(self, job_data: dict) -> JobResult: 

16 """Process the job json data to extract relevant information 

17 :param job_data: job data json 

18 :return: dictionary containing job information""" 

19 

20 min_amount = max_amount = None 

21 salary_currency = None 

22 base_salary = job_data.get("base_salary") 

23 if base_salary: 

24 currency = base_salary.get("currency") 

25 payment_period = base_salary.get("payment_period") 

26 

27 # Only extract salary if it's yearly 

28 if payment_period and payment_period.lower() == "yr": 

29 min_amount = base_salary.get("min_amount") 

30 max_amount = base_salary.get("max_amount") 

31 salary_currency = currency 

32 

33 return JobResult( 

34 company=job_data.get("company_name"), 

35 company_id=job_data.get("company_id"), 

36 location=job_data.get("job_location"), 

37 job=JobInfo( 

38 title=job_data.get("job_title"), 

39 description=job_data.get("job_summary", "").strip("Show more Show less") or None, 

40 url=job_data.get("url"), 

41 salary=Salary( 

42 min_amount=min_amount, 

43 max_amount=max_amount, 

44 currency=salary_currency, 

45 ), 

46 ), 

47 raw=str(job_data), 

48 ) 

49 

50 

51if __name__ == "__main__": 

52 scraper = LinkedinBrightdataJobScraper(["4313361652"]) 

53 job_data1 = scraper.scrape_job() 

54 print(job_data1[0])