actual-imap-poll/parsers.py

115 lines
3.4 KiB
Python

import re
from abc import ABC
from abc import abstractmethod
from datetime import datetime
from decimal import Decimal
from email.message import EmailMessage
from email.message import Message
from uuid import UUID
from model import Transaction
class TransactionParser(ABC):
@abstractmethod
def match(self, msg: Message) -> bool:
"""
Determines if the given email message matches the criteria for this parser.
Args:
msg (Message): The email message to evaluate.
Returns:
bool: True if the message matches the parser's criteria, False otherwise.
"""
pass
@abstractmethod
def extract(self, msg: EmailMessage) -> Transaction:
"""
Extracts transaction details from the given email message.
Args:
msg (EmailMessage): The email message to parse.
Returns:
Transaction: A Transaction object containing the extracted details.
Raises:
TransactionParsingFailed: If the message cannot be parsed successfully.
"""
pass
class TransactionParsingFailed(Exception):
pass
class RogersBankParser(TransactionParser):
EXTRACT_RE = re.compile(
r"Attempt of \$(\d+\.\d{2}) was made on ([A-z]{3} \d{1,2}, \d{4})[^<]*at ([^<]+) in"
)
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message) -> bool:
return (
msg["From"] == "Rogers Bank <onlineservices@RogersBank.com>"
and msg["Subject"] == "Purchase amount alert"
)
def extract(self, msg: EmailMessage) -> Transaction:
body = msg.get_body()
if body is None:
raise TransactionParsingFailed("No body of message found")
matches = self.EXTRACT_RE.search(body.as_string())
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1])
date_raw = matches[2]
payee = matches[3]
date = datetime.strptime(date_raw, "%b %d, %Y").date()
return Transaction(
account=self.account_id,
date=date,
amount=amount,
payee=payee,
notes="via email",
imported_id=msg["Message-ID"],
)
class MBNAParser(TransactionParser):
EXTRACT_RE = re.compile(
r"A purchase of \$(\d+\.\d{2}) from ([^<]+) was made at (\d{1,2}:\d{2} (AM|PM)) UTC on (\d{4}-\d{2}-\d{2})" # noqa: E501
)
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message):
return (
msg["From"] == "MBNA Notifications <noreply@mbna.ca>"
and msg["Subject"] == "MBNA - Transaction Alert"
)
def extract(self, msg: EmailMessage) -> Transaction:
body = msg.get_body()
if body is None:
raise TransactionParsingFailed("No body of message found")
matches = self.EXTRACT_RE.search(body.as_string())
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1])
payee = matches[2]
date = matches[5]
return Transaction(
account=self.account_id,
date=date,
amount=amount,
payee=payee,
notes="via email",
imported_id=msg["Message-ID"],
)