Coverage for backend/app/eis/models.py: 100%

47 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-22 15:38 +0000

1"""Email Ingestion System (EIS) Database Models 

2 

3Defines SQLAlchemy ORM models for email-based job scraping functionality. 

4Includes models for job alert emails, extracted job IDs, and scraped job data 

5with associated companies and locations from external sources.""" 

6 

7from sqlalchemy import Column, String, Boolean, ForeignKey, Integer, DateTime, Float, TIMESTAMP, Table, UniqueConstraint 

8from sqlalchemy.orm import relationship 

9from sqlalchemy.sql import expression 

10 

11from app.models import Base, CommonBase, Owned 

12 

13 

14# ------------------------------------------------------ MAPPINGS ------------------------------------------------------ 

15 

16 

17email_scrapedjob_mapping = Table( 

18 "email_scrapedjob_mapping", 

19 Base.metadata, 

20 Column("email_id", Integer, ForeignKey("job_alert_email.id", ondelete="CASCADE"), primary_key=True), 

21 Column("job_id", Integer, ForeignKey("scraped_job.id", ondelete="CASCADE"), primary_key=True), 

22) 

23 

24 

25# -------------------------------------------------------- DATA -------------------------------------------------------- 

26 

27 

28class JobAlertEmail(Owned, Base): 

29 """Represents email messages containing job information like LinkedIn and Indeed job alerts 

30 

31 Attributes: 

32 ----------- 

33 - `external_email_id` (str): Unique identifier for the email message. 

34 - `subject` (str): Subject of the email message. 

35 - `sender` (str): Sender of the email message. 

36 - `date_received` (datetime): Date and time when the email was received. 

37 - `platform` (str): Platform from which the email was received (LinkedIn, Indeed, etc.). 

38 - `body` (str): Body of the email message. 

39 

40 Foreign keys: 

41 ------------- 

42 - `service_log_id` (int, optional): Identifier for the EisServiceLog associated with the email. 

43 

44 Relationships: 

45 -------------- 

46 - `jobs` (list of ScrapedJob): List of scraped jobs associated with the email. 

47 - `service_log` (EisServiceLog): EisServiceLog object associated with the email.""" 

48 

49 external_email_id = Column(String, unique=True, nullable=False) 

50 subject = Column(String, nullable=True) 

51 sender = Column(String, nullable=True) 

52 date_received = Column(TIMESTAMP(timezone=True), nullable=True) 

53 platform = Column(String, nullable=True) 

54 body = Column(String, nullable=True) 

55 

56 # Foreign keys 

57 service_log_id = Column(Integer, ForeignKey("eis_service_log.id", ondelete="SET NULL"), nullable=True) 

58 

59 # Relationships 

60 jobs = relationship("ScrapedJob", secondary=email_scrapedjob_mapping, back_populates="emails") 

61 service_log = relationship("EisServiceLog", back_populates="emails") 

62 

63 

64class ScrapedJob(Owned, Base): 

65 """Represents scraped job postings from external sources with additional metadata. 

66 

67 Attributes: 

68 ----------- 

69 - `external_job_id` (str): Unique identifier for the job posting. 

70 - `is_scraped` (bool): Indicates whether the job has been scraped. 

71 - `is_failed` (bool): Indicates whether the job scraping failed. 

72 - `scrape_error` (str, optional): Error message if the job scraping failed. 

73 - `is_active` (bool): Indicates whether the job is active 

74 - `title` (str, optional): Title of the job. 

75 - `description` (str, optional): Description of the job. 

76 - `salary_min` (float, optional): Minimum salary of the job. 

77 - `salary_max` (float, optional): Maximum salary of the job. 

78 - `url` (str, optional): URL to the job posting. 

79 - `deadline` (datetime, optional): Deadline for the job. 

80 - `company` (str, optional): Company name of the job. 

81 - `location` (str, optional): Location of the job. 

82 

83 Relationships: 

84 -------------- 

85 - `emails` (list of JobAlertEmail): List of email messages associated with the job.""" 

86 

87 external_job_id = Column(String, nullable=False) 

88 is_scraped = Column(Boolean, nullable=False, server_default=expression.false()) 

89 is_failed = Column(Boolean, nullable=False, server_default=expression.false()) 

90 scrape_error = Column(String, nullable=True) 

91 scrape_datetime = Column(TIMESTAMP(timezone=True), nullable=True) 

92 is_active = Column(Boolean, nullable=False, server_default=expression.true()) 

93 

94 # Job data 

95 title = Column(String, nullable=True) 

96 description = Column(String, nullable=True) 

97 salary_min = Column(Float, nullable=True) 

98 salary_max = Column(Float, nullable=True) 

99 url = Column(String, nullable=True) 

100 deadline = Column(TIMESTAMP(timezone=True), nullable=True) 

101 company = Column(String, nullable=True) 

102 location = Column(String, nullable=True) 

103 

104 # Relationships 

105 emails = relationship("JobAlertEmail", secondary=email_scrapedjob_mapping, back_populates="jobs") 

106 

107 # Constraints 

108 __table_args__ = (UniqueConstraint("external_job_id", "owner_id", name="unique_job_per_owner"),) 

109 

110 

111class EisServiceLog(CommonBase, Base): 

112 """Represents logs of service operations and their status. 

113 

114 Attributes: 

115 ----------- 

116 - `name` (str): Name of the service. 

117 - `run_duration` (float, optional): Duration of the service run. 

118 - `run_datetime` (datetime): Date and time of the service run. 

119 - `is_success` (bool): Indicates whether the service run was successful. 

120 - `error_message` (str, optional): Error message if the service run failed. 

121 - `job_success_n` (int, optional): Number of successful jobs scraped. 

122 - `job_fail_n` (int, optional): Number of failed jobs scraped. 

123 - `users_processed_n` (int, optional): Number of users processed. 

124 - `emails_found_n` (int, optional): Number of email messages found. 

125 - `emails_saved_n` (int, optional): Number of email messages saved. 

126 - `jobs_extracted_n` (int, optional): Number of jobs extracted. 

127 - `linkedin_job_n` (int, optional): Number of LinkedIn jobs extracted. 

128 - `indeed_job_n` (int, optional): Number of Indeed jobs extracted. 

129 

130 Relationships: 

131 -------------- 

132 - `emails` (list of JobAlertEmail): List of email messages associated with the service.""" 

133 

134 name = Column(String, nullable=False) 

135 run_duration = Column(Float, nullable=True) 

136 run_datetime = Column(DateTime, nullable=False) 

137 is_success = Column(Boolean, nullable=True) 

138 error_message = Column(String, default=0, nullable=False) 

139 job_success_n = Column(Integer, default=0, nullable=False) 

140 job_fail_n = Column(Integer, default=0, nullable=False) 

141 users_processed_n = Column(Integer, default=0, nullable=False) 

142 emails_found_n = Column(Integer, default=0, nullable=False) 

143 emails_saved_n = Column(Integer, default=0, nullable=False) 

144 jobs_extracted_n = Column(Integer, default=0, nullable=False) 

145 linkedin_job_n = Column(Integer, default=0, nullable=False) 

146 indeed_job_n = Column(Integer, default=0, nullable=False) 

147 

148 emails = relationship("JobAlertEmail", back_populates="service_log")