From d0f69611dc2d9f24e03a9c2769ed29de92e4c91e Mon Sep 17 00:00:00 2001 From: Keenan Tims Date: Thu, 24 Apr 2025 18:47:45 -0700 Subject: [PATCH] fix content extraction for multipart mbna --- README.md | 2 +- parsers.py | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f3216d2..fc5daf5 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ class MinimalBankParser(TransactionParser): return msg["From"] == "Minimal Bank " and "Transaction Alert" in msg["Subject"] def extract(self, msg): - body = msg.get_content() + body = msg.get_body().get_content() amount = ... # Extract amount from body date = ... # Extract date from body payee = ... # Extract payee from body diff --git a/parsers.py b/parsers.py index 9828f2c..56a4e10 100644 --- a/parsers.py +++ b/parsers.py @@ -62,10 +62,13 @@ class RogersBankParser(TransactionParser): ) def extract(self, msg: EmailMessage) -> Optional[Transaction]: - body = msg.get_content() + body = msg.get_body() if body is None: raise TransactionParsingFailed("No body of message found") - matches = self.EXTRACT_RE.search(body) + content = body.get_content() + if content is None: + raise TransactionParsingFailed("No content of message found") + matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") amount = Decimal(matches[1]) @@ -97,10 +100,13 @@ class MBNAParser(TransactionParser): ) def extract(self, msg: EmailMessage) -> Optional[Transaction]: - body = msg.get_content() + body = msg.get_body() if body is None: raise TransactionParsingFailed("No body of message found") - matches = self.EXTRACT_RE.search(body) + content = body.get_content() + if content is None: + raise TransactionParsingFailed("No content of message found") + matches = self.EXTRACT_RE.search(content) if matches is None: raise TransactionParsingFailed("No matches for extraction RE") amount = Decimal(matches[1]) @@ -129,10 +135,13 @@ class BMOParser(TransactionParser): return msg["From"] == "bmoalerts@bmo.com" def extract(self, msg: EmailMessage) -> Optional[Transaction]: - body = msg.get_content() + body = msg.get_body(preferencelist=("html", "plain")) if body is None: raise TransactionParsingFailed("No body of message found") - soup = BeautifulSoup(body, "html.parser") + content = body.get_content() + if content is None: + raise TransactionParsingFailed("No content of message found") + soup = BeautifulSoup(content, "html.parser") matches = self.EXTRACT_RE.search(soup.get_text()) if matches is None: raise TransactionParsingFailed("No matches for extraction RE")