Coverage for backend / app / job_email_scraping / job_scrapers / linkedin.py: 39%
23 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-17 21:34 +0000
1"""LinkedIn Job Scraper using Brightdata"""
3from app.job_email_scraping.job_scrapers.brightdata import BrightdataJobScraper
4from app.job_email_scraping.schemas import Salary, JobInfo, JobResult
7class LinkedinBrightdataJobScraper(BrightdataJobScraper):
8 """LinkedIn Scraper"""
10 base_url = "https://www.linkedin.com/jobs/view/"
11 name = "linkedin"
12 poll_interval: int | float = 2
13 max_attempts: int = 60
15 def _process_job_data(self, job_data: dict) -> JobResult:
16 """Process the job json data to extract relevant information
17 :param job_data: job data json
18 :return: dictionary containing job information"""
20 min_amount = max_amount = None
21 salary_currency = None
22 base_salary = job_data.get("base_salary")
23 if base_salary:
24 currency = base_salary.get("currency")
25 payment_period = base_salary.get("payment_period")
27 # Only extract salary if it's yearly
28 if payment_period and payment_period.lower() == "yr":
29 min_amount = base_salary.get("min_amount")
30 max_amount = base_salary.get("max_amount")
31 salary_currency = currency
33 return JobResult(
34 company=job_data.get("company_name"),
35 company_id=job_data.get("company_id"),
36 location=job_data.get("job_location"),
37 job=JobInfo(
38 title=job_data.get("job_title"),
39 description=job_data.get("job_summary", "").strip("Show more Show less") or None,
40 url=job_data.get("url"),
41 salary=Salary(
42 min_amount=min_amount,
43 max_amount=max_amount,
44 currency=salary_currency,
45 ),
46 ),
47 raw=str(job_data),
48 )
51if __name__ == "__main__":
52 scraper = LinkedinBrightdataJobScraper(["4313361652"])
53 job_data1 = scraper.scrape_job()
54 print(job_data1[0])