fix bmo parser for new format / lints

2026-05-09 00:00:38 -07:00
parent 93778beb31
commit 2662747ebe
3 changed files with 21 additions and 10 deletions
@@ -58,6 +58,18 @@ class TransactionParser(ABC):
            raise TransactionParsingFailed("No content of message found")
        return content

+    @staticmethod
+    def strip_html(msg: EmailMessage) -> str:
+        body = msg.get_body(preferencelist=("html", "plain"))
+        if body is None:
+            raise TransactionParsingFailed("No HTML body of message found")
+        content = body.get_content()
+        if content is None:
+            raise TransactionParsingFailed("No content of message found")
+
+        soup = BeautifulSoup(content, "html.parser")
+        return soup.get_text()
+

 class TransactionParsingFailed(Exception):
    pass
@@ -136,8 +148,8 @@ class MBNAParser(TransactionParser):

 class BMOParser(TransactionParser):
    EXTRACT_RE = re.compile(
-        r"We want to let you know that a (withdrawal|deposit) of\s+\$([0-9,]+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})",  # noqa: E501
-        flags=re.MULTILINE,
+        r"There was a (withdrawal|deposit).*Amount:\s*\$([0-9,]+\.\d{2}).*Account:\s*Ending in ([0-9]+)",  # noqa: E501
+        flags=re.DOTALL,
    )

    def __init__(self, account_map: dict[int, UUID]):
@@ -147,9 +159,8 @@ class BMOParser(TransactionParser):
        return msg["From"] == "bmoalerts@bmo.com"

    def extract(self, msg: EmailMessage) -> Optional[Transaction]:
-        content = self.get_content(msg)
-        soup = BeautifulSoup(content, "html.parser")
-        matches = self.EXTRACT_RE.search(soup.get_text())
+        content = self.strip_html(msg)
+        matches = self.EXTRACT_RE.search(content)
        if matches is None:
            raise TransactionParsingFailed("No matches for extraction RE")