add bmo parser, some refactors

This commit is contained in:
Keenan Tims 2025-04-24 18:17:20 -07:00
parent bb1fcb3d94
commit d1845cc5d9
6 changed files with 94 additions and 43 deletions

View File

@ -4,12 +4,16 @@ FROM node:20-alpine
# Create app directory # Create app directory
WORKDIR /app WORKDIR /app
RUN apk add python3 RUN apk add python3 py3-pip
# Copy package.json and install dependencies # Copy package.json and install dependencies
COPY package*.json ./ COPY package*.json ./
RUN npm install --omit=dev RUN npm install --omit=dev
# Copy requirements.txt and install dependencies
COPY requirements.txt ./
RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt
# Copy the rest of the app # Copy the rest of the app
COPY . . COPY . .

View File

@ -58,7 +58,7 @@ class MinimalBankParser(TransactionParser):
return msg["From"] == "Minimal Bank <alerts@minimalbank.com>" and "Transaction Alert" in msg["Subject"] return msg["From"] == "Minimal Bank <alerts@minimalbank.com>" and "Transaction Alert" in msg["Subject"]
def extract(self, msg): def extract(self, msg):
body = msg.get_body().as_string() body = msg.get_content()
amount = ... # Extract amount from body amount = ... # Extract amount from body
date = ... # Extract date from body date = ... # Extract date from body
payee = ... # Extract payee from body payee = ... # Extract payee from body

View File

@ -1,12 +1,14 @@
import re import re
from abc import ABC from abc import ABC, abstractmethod
from abc import abstractmethod from datetime import date, datetime
from datetime import datetime
from decimal import Decimal from decimal import Decimal
from email.message import EmailMessage from email.message import EmailMessage, Message
from email.message import Message from logging import info
from typing import Optional
from uuid import UUID from uuid import UUID
from bs4 import BeautifulSoup
from model import Transaction from model import Transaction
@ -25,7 +27,7 @@ class TransactionParser(ABC):
pass pass
@abstractmethod @abstractmethod
def extract(self, msg: EmailMessage) -> Transaction: def extract(self, msg: EmailMessage) -> Optional[Transaction]:
""" """
Extracts transaction details from the given email message. Extracts transaction details from the given email message.
@ -59,11 +61,11 @@ class RogersBankParser(TransactionParser):
and msg["Subject"] == "Purchase amount alert" and msg["Subject"] == "Purchase amount alert"
) )
def extract(self, msg: EmailMessage) -> Transaction: def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body() body = msg.get_content()
if body is None: if body is None:
raise TransactionParsingFailed("No body of message found") raise TransactionParsingFailed("No body of message found")
matches = self.EXTRACT_RE.search(body.as_string()) matches = self.EXTRACT_RE.search(body)
if matches is None: if matches is None:
raise TransactionParsingFailed("No matches for extraction RE") raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1]) amount = Decimal(matches[1])
@ -88,27 +90,68 @@ class MBNAParser(TransactionParser):
def __init__(self, account_id: UUID): def __init__(self, account_id: UUID):
self.account_id = account_id self.account_id = account_id
def match(self, msg: Message): def match(self, msg: Message) -> bool:
return ( return (
msg["From"] == "MBNA Notifications <noreply@mbna.ca>" msg["From"] == "MBNA Notifications <noreply@mbna.ca>"
and msg["Subject"] == "MBNA - Transaction Alert" and msg["Subject"] == "MBNA - Transaction Alert"
) )
def extract(self, msg: EmailMessage) -> Transaction: def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_body() body = msg.get_content()
if body is None: if body is None:
raise TransactionParsingFailed("No body of message found") raise TransactionParsingFailed("No body of message found")
matches = self.EXTRACT_RE.search(body.as_string()) matches = self.EXTRACT_RE.search(body)
if matches is None: if matches is None:
raise TransactionParsingFailed("No matches for extraction RE") raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1]) amount = Decimal(matches[1])
payee = matches[2] payee = matches[2]
date = matches[5] date_raw = matches[5]
return Transaction( return Transaction(
account=self.account_id, account=self.account_id,
date=date, date=date.fromisoformat(date_raw),
amount=amount, amount=amount,
payee=payee, payee=payee,
notes="via email", notes="via email",
imported_id=msg["Message-ID"], imported_id=msg["Message-ID"],
) )
class BMOParser(TransactionParser):
EXTRACT_RE = re.compile(
r"We want to let you know that a (withdrawal|deposit) of\s+\$(\d+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501
flags=re.MULTILINE,
)
def __init__(self, account_map: dict[int, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "bmoalerts@bmo.com"
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_content()
if body is None:
raise TransactionParsingFailed("No body of message found")
soup = BeautifulSoup(body, "html.parser")
matches = self.EXTRACT_RE.search(soup.get_text())
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[2])
if matches[1] == "withdrawal":
amount = amount * -1
date_raw = msg["Date"]
date = datetime.strptime(date_raw, "%a, %d %b %Y %H:%M:%S %z").date()
account_ref = int(matches[3])
if account_ref not in self._account_map:
info("Account %s not in account map", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee="",
notes="via email",
imported_id=msg["Message-ID"],
)

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
beautifulsoup4

View File

@ -6,20 +6,13 @@ import logging
import os import os
import ssl import ssl
from dataclasses import dataclass from dataclasses import dataclass
from email.message import EmailMessage
from imaplib import IMAP4 from imaplib import IMAP4
from logging import debug from logging import debug, error, info, warning
from logging import error
from logging import info
from logging import warning
from os import getenv from os import getenv
from pprint import pprint
from typing import cast
from model import Transaction from model import Transaction
from parsers import TransactionParsingFailed from parsers import TransactionParsingFailed
CONFIG_PATH = getenv("CONFIG_PATH", "/data/config.py") CONFIG_PATH = getenv("CONFIG_PATH", "/data/config.py")
ACTUAL_PATH = "./cli.js" ACTUAL_PATH = "./cli.js"
TIMEOUT = 30 TIMEOUT = 30
@ -33,6 +26,8 @@ async def ticker(interval: float):
def load_config_module(path: str): def load_config_module(path: str):
spec = importlib.util.spec_from_file_location("config", path) spec = importlib.util.spec_from_file_location("config", path)
if spec is None or spec.loader is None:
raise ImportError(f"Could not find config module at {path}")
config = importlib.util.module_from_spec(spec) config = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config) spec.loader.exec_module(config)
return config return config
@ -43,12 +38,12 @@ class AppConfig:
@staticmethod @staticmethod
def from_environ() -> "AppConfig": def from_environ() -> "AppConfig":
return AppConfig( return AppConfig(
imap_server=getenv("IMAP_SERVER"), imap_server=os.environ["IMAP_SERVER"],
imap_port=int(getenv("IMAP_PORT", 143)), imap_port=int(getenv("IMAP_PORT", 143)),
imap_user=getenv("IMAP_USER"), imap_user=os.environ["IMAP_USER"],
imap_pass=getenv("IMAP_PASS"), imap_pass=os.environ["IMAP_PASS"],
imap_mailbox=getenv("IMAP_MAILBOX", "INBOX"), imap_mailbox=getenv("IMAP_MAILBOX", "INBOX"),
imap_interval=float(getenv("IMAP_INTERVAL", 300)), imap_interval=int(getenv("IMAP_INTERVAL", 300)),
imap_starttls=bool(getenv("IMAP_STARTTLS", True)), imap_starttls=bool(getenv("IMAP_STARTTLS", True)),
) )
@ -71,10 +66,10 @@ class App:
cmd = ( cmd = (
ACTUAL_PATH ACTUAL_PATH
+ f' -a "{t.account}"' + f' -a "{t.account}"'
+ f' -p "{t.payee}"' + (f' -p "{t.payee}"' if t.payee else "")
+ f' -m "{t.amount}"' + f' -m "{t.amount}"'
+ f' -d "{t.date}"' + f' -d "{t.date}"'
f' -n "{t.notes}"' + (f' -n "{t.notes}"' if t.notes else "")
) )
debug("Actual command: %s", cmd) debug("Actual command: %s", cmd)
proc = await asyncio.create_subprocess_shell( proc = await asyncio.create_subprocess_shell(
@ -83,16 +78,13 @@ class App:
stderr=asyncio.subprocess.STDOUT, stderr=asyncio.subprocess.STDOUT,
stdout=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE,
) )
stdout, stderr = await proc.communicate() stdout, _stderr = await proc.communicate()
if proc.returncode != 0: if proc.returncode != 0:
error("Submitting to actual failed: %s", stdout) error("Submitting to actual failed: %s", stdout)
async def process_message(self, msg_b: bytes): async def process_message(self, msg_b: bytes):
debug("parsing message") debug("parsing message")
msg = cast( msg = email.message_from_bytes(msg_b, policy=email.policy.default)
EmailMessage, email.message_from_bytes(msg_b, policy=email.policy.default)
)
pprint(msg)
info( info(
"Found message from %s to %s subject %s", "Found message from %s to %s subject %s",
msg.get("From", "<unknown>"), msg.get("From", "<unknown>"),
@ -106,8 +98,11 @@ class App:
if parser.match(msg): if parser.match(msg):
info("Parser %s claimed message", type(parser).__name__) info("Parser %s claimed message", type(parser).__name__)
trans = parser.extract(msg) trans = parser.extract(msg)
if trans is not None:
info("Submitting transaction to Actual: %s", trans) info("Submitting transaction to Actual: %s", trans)
await self.submit_transaction(trans) await self.submit_transaction(trans)
else:
warning("Parser %s returned None", type(parser).__name__)
except TransactionParsingFailed as e: except TransactionParsingFailed as e:
warning("Unable to parse message %s", e) warning("Unable to parse message %s", e)
except Exception as e: except Exception as e:
@ -121,15 +116,23 @@ class App:
M.starttls(context) M.starttls(context)
M.login(self._config.imap_user, self._config.imap_pass) M.login(self._config.imap_user, self._config.imap_pass)
M.select(self._config.imap_mailbox) M.select(self._config.imap_mailbox)
status, m_set = M.search(None, "UNSEEN") _status, m_set = M.search(None, "UNSEEN")
for msg_id in m_set[0].split(): for msg_id in m_set[0].split():
debug("Retrieving msg id %s", msg_id) debug("Retrieving msg id %s", msg_id)
status, msg = M.fetch(msg_id, "(RFC822)") _status, msg = M.fetch(msg_id, "(RFC822)")
await self.process_message(msg[0][1]) if _status != "OK" or msg[0] is None:
error("Unable to fetch message %s", msg_id)
continue
msg_body = msg[0][1]
if isinstance(msg_body, int):
error("Unable to fetch message %s", msg_id)
continue
debug("Processing message %s", msg_id)
await self.process_message(msg_body)
async def run(self): async def run(self):
async for tick in ticker(self._config.imap_interval): async for _ in ticker(self._config.imap_interval):
await self.poll_imap() await self.poll_imap()