Files
actual-imap-poll/actual_imap_poll/parsers.py
2025-11-23 22:16:35 -08:00

301 lines
10 KiB
Python

import re
from abc import ABC, abstractmethod
from datetime import date, datetime
from decimal import Decimal
from email.message import EmailMessage, Message
from logging import info, warning
from typing import Any, Optional, Sequence
from uuid import UUID
from bs4 import BeautifulSoup
from .model import Transaction
def parse_email_time(s: str) -> datetime:
return datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %z")
class TransactionParser(ABC):
@abstractmethod
def match(self, msg: Message) -> bool:
"""
Determines if the given email message matches the criteria for this parser.
Args:
msg (Message): The email message to evaluate.
Returns:
bool: True if the message matches the parser's criteria, False otherwise.
"""
pass
@abstractmethod
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
"""
Extracts transaction details from the given email message.
Args:
msg (EmailMessage): The email message to parse.
Returns:
Transaction: A Transaction object containing the extracted details.
Raises:
TransactionParsingFailed: If the message cannot be parsed successfully.
"""
pass
@staticmethod
def get_content(
msg: EmailMessage, preferencelist: Sequence[str] = ("html", "plain")
) -> Any:
body = msg.get_body(preferencelist=preferencelist)
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
return content
class TransactionParsingFailed(Exception):
pass
class RogersBankParser(TransactionParser):
EXTRACT_RE = re.compile(
r"Attempt of \$([0-9,]+\.\d{2}) was made on ([A-z]{3} \d{1,2}, \d{4})[^<]*at ([^<]+) in ([^<]+)." # noqa: E501
)
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message) -> bool:
return (
msg["From"] == "Rogers Bank <onlineservices@RogersBank.com>"
and msg["Subject"] == "Purchase amount alert"
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
content = self.get_content(msg)
matches = self.EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1].replace(",", "")) * -1
date_raw = matches[2]
payee = matches[3]
location = matches[4]
if "Rebate" == location and "CashBack" in payee:
amount = amount * -1
date = datetime.strptime(date_raw, "%b %d, %Y").date()
return Transaction(
account=self.account_id,
date=date,
amount=amount,
payee=payee,
notes=f"in {location} (via email)",
imported_id=msg["Message-ID"],
)
class MBNAParser(TransactionParser):
EXTRACT_RE = re.compile(
r"A purchase of \$([0-9,]+\.\d{2}) from ([^<]+) was made at (\d{1,2}:\d{2} (AM|PM)) UTC on (\d{4}-\d{2}-\d{2})" # noqa: E501
)
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message) -> bool:
return (
msg["From"] == "MBNA Notifications <noreply@mbna.ca>"
and msg["Subject"] == "MBNA - Transaction Alert"
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
content = self.get_content(msg)
matches = self.EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1].replace(",", "")) * -1
payee = matches[2]
date_raw = matches[5]
return Transaction(
account=self.account_id,
date=date.fromisoformat(date_raw),
amount=amount,
payee=payee,
notes="via email",
imported_id=msg["Message-ID"],
)
class BMOParser(TransactionParser):
EXTRACT_RE = re.compile(
r"We want to let you know that a (withdrawal|deposit) of\s+\$([0-9,]+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501
flags=re.MULTILINE,
)
def __init__(self, account_map: dict[int, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "bmoalerts@bmo.com"
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
content = self.get_content(msg)
soup = BeautifulSoup(content, "html.parser")
matches = self.EXTRACT_RE.search(soup.get_text())
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[2].replace(",", ""))
if matches[1] == "withdrawal":
amount = amount * -1
date = parse_email_time(msg["Date"]).date()
account_ref = int(matches[3])
if account_ref not in self._account_map:
warning("Account %s not in account map, skipping transaction", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee="",
notes="via email",
imported_id=msg["Message-ID"],
)
class CIBCParser(TransactionParser):
PAYMENT_EXTRACT_RE = re.compile(
r"recently received a \$([0-9,]+\.\d{2}) payment to your [^<]+ ending in (\d{4})",
flags=re.MULTILINE,
)
PURCHASE_EXTRACT_RE = re.compile(
r"made a purchase with your CIBC.*ending in (?P<account>\d{4}) for \$(?P<amount>\d+\.\d{2}) at (?P<payee>.*?).<br", # noqa: E501
re.MULTILINE,
)
def __init__(self, account_map: dict[int, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "CIBC Banking <Mailbox.noreply@cibc.com>"
def extract_payment(self, msg: EmailMessage):
content = self.get_content(msg)
matches = self.PAYMENT_EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("no matches for extraction RE")
amount = Decimal(matches[1].replace(",", ""))
account_ref = int(matches[2])
date = parse_email_time(msg["Date"]).date()
if account_ref not in self._account_map:
warning("Account %s not in account map, skipping transaction", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee="",
notes="via email",
imported_id=msg["Message-ID"],
)
def extract_purchase(self, msg: EmailMessage) -> Optional[Transaction]:
content = self.get_content(msg)
matches = self.PURCHASE_EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("no matches for extraction RE")
amount = Decimal(matches["amount"].replace(",", "")) * -1
date = parse_email_time(msg["Date"]).date()
account_ref = int(matches["account"])
if account_ref not in self._account_map:
warning("Account %s not in account map, skipping transaction", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee=matches["payee"],
notes="via email",
imported_id=msg["Message-ID"],
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
match msg["Subject"]:
case "New payment to your credit card":
return self.extract_payment(msg)
case "New purchase on your credit card":
return self.extract_purchase(msg)
return None
class ScotiaBankParser(TransactionParser):
PAYMENT_EXTRACT_RE = re.compile(
r"There was an authorization (?P<card>without the credit card present )?for \$(?P<amount>[0-9]+\.[0-9]{2}) at (?P<payee>.+) on account (?P<account>[0-9*]+) at\s+(?P<time>[0-9]{1,2}:[0-9]{2} (am|pm))", # noqa: E501
re.MULTILINE,
)
def __init__(self, account_map: dict[str, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "Scotia InfoAlerts <infoalerts@scotiabank.com>"
def extract_transaction(self, msg: EmailMessage) -> Optional[Transaction]:
content = self.get_content(msg)
matches = self.PAYMENT_EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("no matches for extraction RE")
amount = Decimal(matches["amount"].replace(",", "")) * -1
date = parse_email_time(msg["Date"]).date()
if matches["account"] not in self._account_map:
warning(
"Account %s not in account map, skipping transaction",
matches["account"],
)
return None
account_id = self._account_map[matches["account"]]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee=matches["payee"],
notes="without card (via email)"
if matches.group("card") is not None
else "with card (via email)",
imported_id=msg["Message-ID"],
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
match msg["Subject"]:
case (
"Authorization on your credit account"
| "Authorization without credit card present"
):
return self.extract_transaction(msg)
info("Subject `%s` didn't match any extractors", msg["Subject"])
return None