From c654bb116f9e140d786ba8c5993b7071fe887dc7 Mon Sep 17 00:00:00 2001 From: Keenan Tims Date: Sun, 23 Nov 2025 19:06:27 -0800 Subject: [PATCH] Add ScotiaBank parser --- parsers.py | 100 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 28 deletions(-) diff --git a/parsers.py b/parsers.py index de2c4b7..a77fcb3 100644 --- a/parsers.py +++ b/parsers.py @@ -3,8 +3,8 @@ from abc import ABC, abstractmethod from datetime import date, datetime from decimal import Decimal from email.message import EmailMessage, Message -from logging import info -from typing import Optional +from logging import info, warning +from typing import Any, Optional, Sequence from uuid import UUID from bs4 import BeautifulSoup @@ -46,6 +46,18 @@ class TransactionParser(ABC): """ pass + @staticmethod + def get_content( + msg: EmailMessage, preferencelist: Sequence[str] = ("html", "plain") + ) -> Any: + body = msg.get_body(preferencelist=preferencelist) + if body is None: + raise TransactionParsingFailed("No body of message found") + content = body.get_content() + if content is None: + raise TransactionParsingFailed("No content of message found") + return content + class TransactionParsingFailed(Exception): pass @@ -66,12 +78,7 @@ class RogersBankParser(TransactionParser): ) def extract(self, msg: EmailMessage) -> Optional[Transaction]: - body = msg.get_body() - if body is None: - raise TransactionParsingFailed("No body of message found") - content = body.get_content() - if content is None: - raise TransactionParsingFailed("No content of message found") + content = self.get_content(msg) matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") @@ -110,12 +117,7 @@ class MBNAParser(TransactionParser): ) def extract(self, msg: EmailMessage) -> Optional[Transaction]: - body = msg.get_body(preferencelist=("html", "plain")) - if body is None: - raise TransactionParsingFailed("No body of message found") - content = body.get_content() - if content is None: - raise TransactionParsingFailed("No content of message found") + content = self.get_content(msg) matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") @@ -145,12 +147,7 @@ class BMOParser(TransactionParser): return msg["From"] == "bmoalerts@bmo.com" def extract(self, msg: EmailMessage) -> Optional[Transaction]: - body = msg.get_body(preferencelist=("html", "plain")) - if body is None: - raise TransactionParsingFailed("No body of message found") - content = body.get_content() - if content is None: - raise TransactionParsingFailed("No content of message found") + content = self.get_content(msg) soup = BeautifulSoup(content, "html.parser") matches = self.EXTRACT_RE.search(soup.get_text()) if matches is None: @@ -162,7 +159,7 @@ class BMOParser(TransactionParser): date = parse_email_time(msg["Date"]).date() account_ref = int(matches[3]) if account_ref not in self._account_map: - info("Account %s not in account map", account_ref) + warning("Account %s not in account map, skipping transaction", account_ref) return None account_id = self._account_map[account_ref] return Transaction( @@ -188,12 +185,7 @@ class CIBCParser(TransactionParser): return msg["From"] == "CIBC Banking " def extract_payment(self, msg: EmailMessage): - body = msg.get_body(preferencelist=("html", "plain")) - if body is None: - raise TransactionParsingFailed("No body of message found") - content = body.get_content() - if content is None: - raise TransactionParsingFailed("No content of message found") + content = self.get_content(msg) matches = self.PAYMENT_EXTRACT_RE.search(content) if matches is None: @@ -204,7 +196,7 @@ class CIBCParser(TransactionParser): date = parse_email_time(msg["Date"]).date() if account_ref not in self._account_map: - info("Account %s not in account map", account_ref) + warning("Account %s not in account map, skipping transaction", account_ref) return None account_id = self._account_map[account_ref] @@ -222,3 +214,55 @@ class CIBCParser(TransactionParser): case "New payment to your credit card": return self.extract_payment(msg) return None + + +class ScotiaBankParser(TransactionParser): + PAYMENT_EXTRACT_RE = re.compile( + r"There was an authorization (?Pwithout the credit card present )?for \$(?P[0-9]+\.[0-9]{2}) at (?P.+) on account (?P[0-9*]+) at\s+(?P