import re from abc import ABC, abstractmethod from datetime import date, datetime from decimal import Decimal from email.message import EmailMessage, Message from logging import info from typing import Optional from uuid import UUID from bs4 import BeautifulSoup from model import Transaction class TransactionParser(ABC): @abstractmethod def match(self, msg: Message) -> bool: """ Determines if the given email message matches the criteria for this parser. Args: msg (Message): The email message to evaluate. Returns: bool: True if the message matches the parser's criteria, False otherwise. """ pass @abstractmethod def extract(self, msg: EmailMessage) -> Optional[Transaction]: """ Extracts transaction details from the given email message. Args: msg (EmailMessage): The email message to parse. Returns: Transaction: A Transaction object containing the extracted details. Raises: TransactionParsingFailed: If the message cannot be parsed successfully. """ pass class TransactionParsingFailed(Exception): pass class RogersBankParser(TransactionParser): EXTRACT_RE = re.compile( r"Attempt of \$(\d+\.\d{2}) was made on ([A-z]{3} \d{1,2}, \d{4})[^<]*at ([^<]+) in" ) def __init__(self, account_id: UUID): self.account_id = account_id def match(self, msg: Message) -> bool: return ( msg["From"] == "Rogers Bank " and msg["Subject"] == "Purchase amount alert" ) def extract(self, msg: EmailMessage) -> Optional[Transaction]: body = msg.get_body() if body is None: raise TransactionParsingFailed("No body of message found") content = body.get_content() if content is None: raise TransactionParsingFailed("No content of message found") matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") amount = Decimal(matches[1]) date_raw = matches[2] payee = matches[3] date = datetime.strptime(date_raw, "%b %d, %Y").date() return Transaction( account=self.account_id, date=date, amount=amount, payee=payee, notes="via email", imported_id=msg["Message-ID"], ) class MBNAParser(TransactionParser): EXTRACT_RE = re.compile( r"A purchase of \$(\d+\.\d{2}) from ([^<]+) was made at (\d{1,2}:\d{2} (AM|PM)) UTC on (\d{4}-\d{2}-\d{2})" # noqa: E501 ) def __init__(self, account_id: UUID): self.account_id = account_id def match(self, msg: Message) -> bool: return ( msg["From"] == "MBNA Notifications " and msg["Subject"] == "MBNA - Transaction Alert" ) def extract(self, msg: EmailMessage) -> Optional[Transaction]: body = msg.get_body(preferencelist=("html", "plain")) if body is None: raise TransactionParsingFailed("No body of message found") content = body.get_content() if content is None: raise TransactionParsingFailed("No content of message found") matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") amount = Decimal(matches[1]) payee = matches[2] date_raw = matches[5] return Transaction( account=self.account_id, date=date.fromisoformat(date_raw), amount=amount, payee=payee, notes="via email", imported_id=msg["Message-ID"], ) class BMOParser(TransactionParser): EXTRACT_RE = re.compile( r"We want to let you know that a (withdrawal|deposit) of\s+\$([0-9,]+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501 flags=re.MULTILINE, ) def __init__(self, account_map: dict[int, UUID]): self._account_map = account_map def match(self, msg: Message) -> bool: return msg["From"] == "bmoalerts@bmo.com" def extract(self, msg: EmailMessage) -> Optional[Transaction]: body = msg.get_body(preferencelist=("html", "plain")) if body is None: raise TransactionParsingFailed("No body of message found") content = body.get_content() if content is None: raise TransactionParsingFailed("No content of message found") soup = BeautifulSoup(content, "html.parser") matches = self.EXTRACT_RE.search(soup.get_text()) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") amount = Decimal(matches[2].replace(",", "")) if matches[1] == "withdrawal": amount = amount * -1 date_raw = msg["Date"] date = datetime.strptime(date_raw, "%a, %d %b %Y %H:%M:%S %z").date() account_ref = int(matches[3]) if account_ref not in self._account_map: info("Account %s not in account map", account_ref) return None account_id = self._account_map[account_ref] return Transaction( account=account_id, date=date, amount=amount, payee="", notes="via email", imported_id=msg["Message-ID"], )