actual-imap-poll/parsers.py

225 lines
7.4 KiB
Python

import re
from abc import ABC, abstractmethod
from datetime import date, datetime
from decimal import Decimal
from email.message import EmailMessage, Message
from logging import info
from typing import Optional
from uuid import UUID
from bs4 import BeautifulSoup
from model import Transaction
def parse_email_time(s: str) -> datetime:
return datetime.strptime(s, "%a, %d %b %Y %H:%M:%S %z")
class TransactionParser(ABC):
@abstractmethod
def match(self, msg: Message) -> bool:
"""
Determines if the given email message matches the criteria for this parser.
Args:
msg (Message): The email message to evaluate.
Returns:
bool: True if the message matches the parser's criteria, False otherwise.
"""
pass
@abstractmethod
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
"""
Extracts transaction details from the given email message.
Args:
msg (EmailMessage): The email message to parse.
Returns:
Transaction: A Transaction object containing the extracted details.
Raises:
TransactionParsingFailed: If the message cannot be parsed successfully.
"""
pass
class TransactionParsingFailed(Exception):
pass
class RogersBankParser(TransactionParser):
EXTRACT_RE = re.compile(
r"Attempt of \$([0-9,]+\.\d{2}) was made on ([A-z]{3} \d{1,2}, \d{4})[^<]*at ([^<]+) in ([^<]+)." # noqa: E501
)
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message) -> bool:
return (
msg["From"] == "Rogers Bank <onlineservices@RogersBank.com>"
and msg["Subject"] == "Purchase amount alert"
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body()
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
matches = self.EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1].replace(",", "")) * -1
date_raw = matches[2]
payee = matches[3]
location = matches[4]
if "Rebate" == location and "CashBack" in payee:
amount = amount * -1
date = datetime.strptime(date_raw, "%b %d, %Y").date()
return Transaction(
account=self.account_id,
date=date,
amount=amount,
payee=payee,
notes=f"in {location} (via email)",
imported_id=msg["Message-ID"],
)
class MBNAParser(TransactionParser):
EXTRACT_RE = re.compile(
r"A purchase of \$([0-9,]+\.\d{2}) from ([^<]+) was made at (\d{1,2}:\d{2} (AM|PM)) UTC on (\d{4}-\d{2}-\d{2})" # noqa: E501
)
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message) -> bool:
return (
msg["From"] == "MBNA Notifications <noreply@mbna.ca>"
and msg["Subject"] == "MBNA - Transaction Alert"
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body(preferencelist=("html", "plain"))
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
matches = self.EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1].replace(",", "")) * -1
payee = matches[2]
date_raw = matches[5]
return Transaction(
account=self.account_id,
date=date.fromisoformat(date_raw),
amount=amount,
payee=payee,
notes="via email",
imported_id=msg["Message-ID"],
)
class BMOParser(TransactionParser):
EXTRACT_RE = re.compile(
r"We want to let you know that a (withdrawal|deposit) of\s+\$([0-9,]+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501
flags=re.MULTILINE,
)
def __init__(self, account_map: dict[int, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "bmoalerts@bmo.com"
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body(preferencelist=("html", "plain"))
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
soup = BeautifulSoup(content, "html.parser")
matches = self.EXTRACT_RE.search(soup.get_text())
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[2].replace(",", ""))
if matches[1] == "withdrawal":
amount = amount * -1
date = parse_email_time(msg["Date"]).date()
account_ref = int(matches[3])
if account_ref not in self._account_map:
info("Account %s not in account map", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee="",
notes="via email",
imported_id=msg["Message-ID"],
)
class CIBCParser(TransactionParser):
PAYMENT_EXTRACT_RE = re.compile(
r"recently received a \$([0-9,]+\.\d{2}) payment to your [^<]+ ending in (\d{4})",
flags=re.MULTILINE,
)
def __init__(self, account_map: dict[int, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "CIBC Banking <mailbox.noreply@cibc.com>"
def extract_payment(self, msg: EmailMessage):
body = msg.get_body(preferencelist=("html", "plain"))
if body is None:
raise TransactionParsingFailed("No body of message found")
content = body.get_content()
if content is None:
raise TransactionParsingFailed("No content of message found")
matches = self.PAYMENT_EXTRACT_RE.search(content)
if matches is None:
raise TransactionParsingFailed("no matches for extraction RE")
amount = Decimal(matches[1].replace(",", ""))
account_ref = int(matches[2])
date = parse_email_time(msg["Date"]).date()
if account_ref not in self._account_map:
info("Account %s not in account map", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee="",
notes="via email",
imported_id=msg["Message-ID"],
)
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
match msg["Subject"]:
case "New payment to your credit card":
return self.extract_payment(msg)
return None