Add ScotiaBank parser

This commit is contained in:
2025-11-23 19:06:27 -08:00
parent 1bf94d1d94
commit c654bb116f

View File

@@ -3,8 +3,8 @@ from abc import ABC, abstractmethod
from datetime import date, datetime from datetime import date, datetime
from decimal import Decimal from decimal import Decimal
from email.message import EmailMessage, Message from email.message import EmailMessage, Message
from logging import info from logging import info, warning
from typing import Optional from typing import Any, Optional, Sequence
from uuid import UUID from uuid import UUID
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@@ -46,6 +46,18 @@ class TransactionParser(ABC):
""" """
pass pass
@staticmethod
def get_content(
msg: EmailMessage, preferencelist: Sequence[str] = ("html", "plain")
) -> Any:
body = msg.get_body(preferencelist=preferencelist)
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
return content
class TransactionParsingFailed(Exception): class TransactionParsingFailed(Exception):
pass pass
@@ -66,12 +78,7 @@ class RogersBankParser(TransactionParser):
) )
def extract(self, msg: EmailMessage) -> Optional[Transaction]: def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body() content = self.get_content(msg)
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
matches = self.EXTRACT_RE.search(content) matches = self.EXTRACT_RE.search(content)
if matches is None: if matches is None:
raise TransactionParsingFailed("No matches for extraction RE") raise TransactionParsingFailed("No matches for extraction RE")
@@ -110,12 +117,7 @@ class MBNAParser(TransactionParser):
) )
def extract(self, msg: EmailMessage) -> Optional[Transaction]: def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body(preferencelist=("html", "plain")) content = self.get_content(msg)
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
matches = self.EXTRACT_RE.search(content) matches = self.EXTRACT_RE.search(content)
if matches is None: if matches is None:
raise TransactionParsingFailed("No matches for extraction RE") raise TransactionParsingFailed("No matches for extraction RE")
@@ -145,12 +147,7 @@ class BMOParser(TransactionParser):
return msg["From"] == "bmoalerts@bmo.com" return msg["From"] == "bmoalerts@bmo.com"
def extract(self, msg: EmailMessage) -> Optional[Transaction]: def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body(preferencelist=("html", "plain")) content = self.get_content(msg)
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
soup = BeautifulSoup(content, "html.parser") soup = BeautifulSoup(content, "html.parser")
matches = self.EXTRACT_RE.search(soup.get_text()) matches = self.EXTRACT_RE.search(soup.get_text())
if matches is None: if matches is None:
@@ -162,7 +159,7 @@ class BMOParser(TransactionParser):
date = parse_email_time(msg["Date"]).date() date = parse_email_time(msg["Date"]).date()
account_ref = int(matches[3]) account_ref = int(matches[3])
if account_ref not in self._account_map: if account_ref not in self._account_map:
info("Account %s not in account map", account_ref) warning("Account %s not in account map, skipping transaction", account_ref)
return None return None
account_id = self._account_map[account_ref] account_id = self._account_map[account_ref]
return Transaction( return Transaction(
@@ -188,12 +185,7 @@ class CIBCParser(TransactionParser):
return msg["From"] == "CIBC Banking <mailbox.noreply@cibc.com>" return msg["From"] == "CIBC Banking <mailbox.noreply@cibc.com>"
def extract_payment(self, msg: EmailMessage): def extract_payment(self, msg: EmailMessage):
body = msg.get_body(preferencelist=("html", "plain")) content = self.get_content(msg)
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
matches = self.PAYMENT_EXTRACT_RE.search(content) matches = self.PAYMENT_EXTRACT_RE.search(content)
if matches is None: if matches is None:
@@ -204,7 +196,7 @@ class CIBCParser(TransactionParser):
date = parse_email_time(msg["Date"]).date() date = parse_email_time(msg["Date"]).date()
if account_ref not in self._account_map: if account_ref not in self._account_map:
info("Account %s not in account map", account_ref) warning("Account %s not in account map, skipping transaction", account_ref)
return None return None
account_id = self._account_map[account_ref] account_id = self._account_map[account_ref]
@@ -222,3 +214,55 @@ class CIBCParser(TransactionParser):
case "New payment to your credit card": case "New payment to your credit card":
return self.extract_payment(msg) return self.extract_payment(msg)
return None return None
class ScotiaBankParser(TransactionParser):
PAYMENT_EXTRACT_RE = re.compile(
r"There was an authorization (?P<card>without the credit card present )?for \$(?P<amount>[0-9]+\.[0-9]{2}) at (?P<payee>.+) on account (?P<account>[0-9*]+) at\s+(?P<time>[0-9]{1,2}:[0-9]{2} (am|pm))", # noqa: E501
re.MULTILINE,
)
def __init__(self, account_map: dict[str, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "Scotia InfoAlerts <infoalerts@scotiabank.com>"
def extract_transaction(self, msg: EmailMessage) -> Optional[Transaction]:
content = self.get_content(msg)
matches = self.PAYMENT_EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("no matches for extraction RE")
amount = Decimal(matches["amount"].replace(",", ""))
date = parse_email_time(msg["Date"]).date()
if matches["account"] not in self._account_map:
warning(
"Account %s not in account map, skipping transaction",
matches["account"],
)
return None
account_id = self._account_map[matches["account"]]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee=matches["payee"],
notes="without card (via email)"
if matches.group("card") is not None
else "with card (via email)",
imported_id=msg["Message-ID"],
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
match msg["Subject"]:
case (
"Authorization on your credit account"
| "Authorization without credit card present"
):
return self.extract_transaction(msg)
info("Subject `%s` didn't match any extractors", msg["Subject"])
return None