From 2662747ebe52dd0fa2923342ab448d4a520daedf Mon Sep 17 00:00:00 2001 From: Keenan Tims Date: Sat, 9 May 2026 00:00:38 -0700 Subject: [PATCH] fix bmo parser for new format / lints --- .pre-commit-config.yaml | 6 +++--- actual_imap_poll/model.py | 4 ++-- actual_imap_poll/parsers.py | 21 ++++++++++++++++----- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 58892ea..199bf6e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,14 @@ # .pre-commit-config.yaml repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.5 # Check for the latest version + rev: v0.15.12 # Check for the latest version hooks: - id: ruff args: ["check", "--select", "I", "--fix"] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 # Check for the latest version + rev: v6.0.0 # Check for the latest version hooks: - id: check-yaml - id: end-of-file-fixer @@ -16,6 +16,6 @@ repos: - id: requirements-txt-fixer - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 # Check for the latest version + rev: 7.3.0 # Check for the latest version hooks: - id: flake8 diff --git a/actual_imap_poll/model.py b/actual_imap_poll/model.py index b07a0d0..f0996e8 100644 --- a/actual_imap_poll/model.py +++ b/actual_imap_poll/model.py @@ -1,5 +1,5 @@ +import datetime from dataclasses import dataclass -from datetime import date from decimal import Decimal from uuid import UUID @@ -12,7 +12,7 @@ class Transaction: """ account: UUID - date: date + date: datetime.date amount: Decimal # Note: decimal dollars, JS shim will convert to cents as described in the API payee: str # imported_payee in API notes: str diff --git a/actual_imap_poll/parsers.py b/actual_imap_poll/parsers.py index 2df6db8..8c4d293 100644 --- a/actual_imap_poll/parsers.py +++ b/actual_imap_poll/parsers.py @@ -58,6 +58,18 @@ class TransactionParser(ABC): raise TransactionParsingFailed("No content of message found") return content + @staticmethod + def strip_html(msg: EmailMessage) -> str: + body = msg.get_body(preferencelist=("html", "plain")) + if body is None: + raise TransactionParsingFailed("No HTML body of message found") + content = body.get_content() + if content is None: + raise TransactionParsingFailed("No content of message found") + + soup = BeautifulSoup(content, "html.parser") + return soup.get_text() + class TransactionParsingFailed(Exception): pass @@ -136,8 +148,8 @@ class MBNAParser(TransactionParser): class BMOParser(TransactionParser): EXTRACT_RE = re.compile( - r"We want to let you know that a (withdrawal|deposit) of\s+\$([0-9,]+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501 - flags=re.MULTILINE, + r"There was a (withdrawal|deposit).*Amount:\s*\$([0-9,]+\.\d{2}).*Account:\s*Ending in ([0-9]+)", # noqa: E501 + flags=re.DOTALL, ) def __init__(self, account_map: dict[int, UUID]): @@ -147,9 +159,8 @@ class BMOParser(TransactionParser): return msg["From"] == "bmoalerts@bmo.com" def extract(self, msg: EmailMessage) -> Optional[Transaction]: - content = self.get_content(msg) - soup = BeautifulSoup(content, "html.parser") - matches = self.EXTRACT_RE.search(soup.get_text()) + content = self.strip_html(msg) + matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE")