Coverage for backend/app/eis/models.py: 100%
47 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-22 15:38 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-22 15:38 +0000
1"""Email Ingestion System (EIS) Database Models
3Defines SQLAlchemy ORM models for email-based job scraping functionality.
4Includes models for job alert emails, extracted job IDs, and scraped job data
5with associated companies and locations from external sources."""
7from sqlalchemy import Column, String, Boolean, ForeignKey, Integer, DateTime, Float, TIMESTAMP, Table, UniqueConstraint
8from sqlalchemy.orm import relationship
9from sqlalchemy.sql import expression
11from app.models import Base, CommonBase, Owned
14# ------------------------------------------------------ MAPPINGS ------------------------------------------------------
17email_scrapedjob_mapping = Table(
18 "email_scrapedjob_mapping",
19 Base.metadata,
20 Column("email_id", Integer, ForeignKey("job_alert_email.id", ondelete="CASCADE"), primary_key=True),
21 Column("job_id", Integer, ForeignKey("scraped_job.id", ondelete="CASCADE"), primary_key=True),
22)
25# -------------------------------------------------------- DATA --------------------------------------------------------
28class JobAlertEmail(Owned, Base):
29 """Represents email messages containing job information like LinkedIn and Indeed job alerts
31 Attributes:
32 -----------
33 - `external_email_id` (str): Unique identifier for the email message.
34 - `subject` (str): Subject of the email message.
35 - `sender` (str): Sender of the email message.
36 - `date_received` (datetime): Date and time when the email was received.
37 - `platform` (str): Platform from which the email was received (LinkedIn, Indeed, etc.).
38 - `body` (str): Body of the email message.
40 Foreign keys:
41 -------------
42 - `service_log_id` (int, optional): Identifier for the EisServiceLog associated with the email.
44 Relationships:
45 --------------
46 - `jobs` (list of ScrapedJob): List of scraped jobs associated with the email.
47 - `service_log` (EisServiceLog): EisServiceLog object associated with the email."""
49 external_email_id = Column(String, unique=True, nullable=False)
50 subject = Column(String, nullable=True)
51 sender = Column(String, nullable=True)
52 date_received = Column(TIMESTAMP(timezone=True), nullable=True)
53 platform = Column(String, nullable=True)
54 body = Column(String, nullable=True)
56 # Foreign keys
57 service_log_id = Column(Integer, ForeignKey("eis_service_log.id", ondelete="SET NULL"), nullable=True)
59 # Relationships
60 jobs = relationship("ScrapedJob", secondary=email_scrapedjob_mapping, back_populates="emails")
61 service_log = relationship("EisServiceLog", back_populates="emails")
64class ScrapedJob(Owned, Base):
65 """Represents scraped job postings from external sources with additional metadata.
67 Attributes:
68 -----------
69 - `external_job_id` (str): Unique identifier for the job posting.
70 - `is_scraped` (bool): Indicates whether the job has been scraped.
71 - `is_failed` (bool): Indicates whether the job scraping failed.
72 - `scrape_error` (str, optional): Error message if the job scraping failed.
73 - `is_active` (bool): Indicates whether the job is active
74 - `title` (str, optional): Title of the job.
75 - `description` (str, optional): Description of the job.
76 - `salary_min` (float, optional): Minimum salary of the job.
77 - `salary_max` (float, optional): Maximum salary of the job.
78 - `url` (str, optional): URL to the job posting.
79 - `deadline` (datetime, optional): Deadline for the job.
80 - `company` (str, optional): Company name of the job.
81 - `location` (str, optional): Location of the job.
83 Relationships:
84 --------------
85 - `emails` (list of JobAlertEmail): List of email messages associated with the job."""
87 external_job_id = Column(String, nullable=False)
88 is_scraped = Column(Boolean, nullable=False, server_default=expression.false())
89 is_failed = Column(Boolean, nullable=False, server_default=expression.false())
90 scrape_error = Column(String, nullable=True)
91 scrape_datetime = Column(TIMESTAMP(timezone=True), nullable=True)
92 is_active = Column(Boolean, nullable=False, server_default=expression.true())
94 # Job data
95 title = Column(String, nullable=True)
96 description = Column(String, nullable=True)
97 salary_min = Column(Float, nullable=True)
98 salary_max = Column(Float, nullable=True)
99 url = Column(String, nullable=True)
100 deadline = Column(TIMESTAMP(timezone=True), nullable=True)
101 company = Column(String, nullable=True)
102 location = Column(String, nullable=True)
104 # Relationships
105 emails = relationship("JobAlertEmail", secondary=email_scrapedjob_mapping, back_populates="jobs")
107 # Constraints
108 __table_args__ = (UniqueConstraint("external_job_id", "owner_id", name="unique_job_per_owner"),)
111class EisServiceLog(CommonBase, Base):
112 """Represents logs of service operations and their status.
114 Attributes:
115 -----------
116 - `name` (str): Name of the service.
117 - `run_duration` (float, optional): Duration of the service run.
118 - `run_datetime` (datetime): Date and time of the service run.
119 - `is_success` (bool): Indicates whether the service run was successful.
120 - `error_message` (str, optional): Error message if the service run failed.
121 - `job_success_n` (int, optional): Number of successful jobs scraped.
122 - `job_fail_n` (int, optional): Number of failed jobs scraped.
123 - `users_processed_n` (int, optional): Number of users processed.
124 - `emails_found_n` (int, optional): Number of email messages found.
125 - `emails_saved_n` (int, optional): Number of email messages saved.
126 - `jobs_extracted_n` (int, optional): Number of jobs extracted.
127 - `linkedin_job_n` (int, optional): Number of LinkedIn jobs extracted.
128 - `indeed_job_n` (int, optional): Number of Indeed jobs extracted.
130 Relationships:
131 --------------
132 - `emails` (list of JobAlertEmail): List of email messages associated with the service."""
134 name = Column(String, nullable=False)
135 run_duration = Column(Float, nullable=True)
136 run_datetime = Column(DateTime, nullable=False)
137 is_success = Column(Boolean, nullable=True)
138 error_message = Column(String, default=0, nullable=False)
139 job_success_n = Column(Integer, default=0, nullable=False)
140 job_fail_n = Column(Integer, default=0, nullable=False)
141 users_processed_n = Column(Integer, default=0, nullable=False)
142 emails_found_n = Column(Integer, default=0, nullable=False)
143 emails_saved_n = Column(Integer, default=0, nullable=False)
144 jobs_extracted_n = Column(Integer, default=0, nullable=False)
145 linkedin_job_n = Column(Integer, default=0, nullable=False)
146 indeed_job_n = Column(Integer, default=0, nullable=False)
148 emails = relationship("JobAlertEmail", back_populates="service_log")