add bmo parser, some refactors

This commit is contained in:
Keenan Tims 2025-04-24 18:17:20 -07:00
parent bb1fcb3d94
commit d1845cc5d9
6 changed files with 94 additions and 43 deletions

View File

@ -4,12 +4,16 @@ FROM node:20-alpine
# Create app directory
WORKDIR /app
RUN apk add python3
RUN apk add python3 py3-pip
# Copy package.json and install dependencies
COPY package*.json ./
RUN npm install --omit=dev
# Copy requirements.txt and install dependencies
COPY requirements.txt ./
RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt
# Copy the rest of the app
COPY . .

View File

@ -58,7 +58,7 @@ class MinimalBankParser(TransactionParser):
return msg["From"] == "Minimal Bank <alerts@minimalbank.com>" and "Transaction Alert" in msg["Subject"]
def extract(self, msg):
body = msg.get_body().as_string()
body = msg.get_content()
amount = ... # Extract amount from body
date = ... # Extract date from body
payee = ... # Extract payee from body

View File

@ -1,12 +1,14 @@
import re
from abc import ABC
from abc import abstractmethod
from datetime import datetime
from abc import ABC, abstractmethod
from datetime import date, datetime
from decimal import Decimal
from email.message import EmailMessage
from email.message import Message
from email.message import EmailMessage, Message
from logging import info
from typing import Optional
from uuid import UUID
from bs4 import BeautifulSoup
from model import Transaction
@ -25,7 +27,7 @@ class TransactionParser(ABC):
pass
@abstractmethod
def extract(self, msg: EmailMessage) -> Transaction:
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
"""
Extracts transaction details from the given email message.
@ -59,11 +61,11 @@ class RogersBankParser(TransactionParser):
and msg["Subject"] == "Purchase amount alert"
)
def extract(self, msg: EmailMessage) -> Transaction:
body = msg.get_body()
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_content()
if body is None:
raise TransactionParsingFailed("No body of message found")
matches = self.EXTRACT_RE.search(body.as_string())
matches = self.EXTRACT_RE.search(body)
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1])
@ -88,27 +90,68 @@ class MBNAParser(TransactionParser):
def __init__(self, account_id: UUID):
self.account_id = account_id
def match(self, msg: Message):
def match(self, msg: Message) -> bool:
return (
msg["From"] == "MBNA Notifications <noreply@mbna.ca>"
and msg["Subject"] == "MBNA - Transaction Alert"
)
def extract(self, msg: EmailMessage) -> Transaction:
body = msg.get_body()
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_content()
if body is None:
raise TransactionParsingFailed("No body of message found")
matches = self.EXTRACT_RE.search(body.as_string())
matches = self.EXTRACT_RE.search(body)
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[1])
payee = matches[2]
date = matches[5]
date_raw = matches[5]
return Transaction(
account=self.account_id,
date=date,
date=date.fromisoformat(date_raw),
amount=amount,
payee=payee,
notes="via email",
imported_id=msg["Message-ID"],
)
class BMOParser(TransactionParser):
EXTRACT_RE = re.compile(
r"We want to let you know that a (withdrawal|deposit) of\s+\$(\d+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501
flags=re.MULTILINE,
)
def __init__(self, account_map: dict[int, UUID]):
self._account_map = account_map
def match(self, msg: Message) -> bool:
return msg["From"] == "bmoalerts@bmo.com"
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
body = msg.get_content()
if body is None:
raise TransactionParsingFailed("No body of message found")
soup = BeautifulSoup(body, "html.parser")
matches = self.EXTRACT_RE.search(soup.get_text())
if matches is None:
raise TransactionParsingFailed("No matches for extraction RE")
amount = Decimal(matches[2])
if matches[1] == "withdrawal":
amount = amount * -1
date_raw = msg["Date"]
date = datetime.strptime(date_raw, "%a, %d %b %Y %H:%M:%S %z").date()
account_ref = int(matches[3])
if account_ref not in self._account_map:
info("Account %s not in account map", account_ref)
return None
account_id = self._account_map[account_ref]
return Transaction(
account=account_id,
date=date,
amount=amount,
payee="",
notes="via email",
imported_id=msg["Message-ID"],
)

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
beautifulsoup4

View File

@ -1,2 +1,2 @@
[flake8]
max-line-length = 100
max-line-length = 100

View File

@ -6,20 +6,13 @@ import logging
import os
import ssl
from dataclasses import dataclass
from email.message import EmailMessage
from imaplib import IMAP4
from logging import debug
from logging import error
from logging import info
from logging import warning
from logging import debug, error, info, warning
from os import getenv
from pprint import pprint
from typing import cast
from model import Transaction
from parsers import TransactionParsingFailed
CONFIG_PATH = getenv("CONFIG_PATH", "/data/config.py")
ACTUAL_PATH = "./cli.js"
TIMEOUT = 30
@ -33,6 +26,8 @@ async def ticker(interval: float):
def load_config_module(path: str):
spec = importlib.util.spec_from_file_location("config", path)
if spec is None or spec.loader is None:
raise ImportError(f"Could not find config module at {path}")
config = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config)
return config
@ -43,12 +38,12 @@ class AppConfig:
@staticmethod
def from_environ() -> "AppConfig":
return AppConfig(
imap_server=getenv("IMAP_SERVER"),
imap_server=os.environ["IMAP_SERVER"],
imap_port=int(getenv("IMAP_PORT", 143)),
imap_user=getenv("IMAP_USER"),
imap_pass=getenv("IMAP_PASS"),
imap_user=os.environ["IMAP_USER"],
imap_pass=os.environ["IMAP_PASS"],
imap_mailbox=getenv("IMAP_MAILBOX", "INBOX"),
imap_interval=float(getenv("IMAP_INTERVAL", 300)),
imap_interval=int(getenv("IMAP_INTERVAL", 300)),
imap_starttls=bool(getenv("IMAP_STARTTLS", True)),
)
@ -71,10 +66,10 @@ class App:
cmd = (
ACTUAL_PATH
+ f' -a "{t.account}"'
+ f' -p "{t.payee}"'
+ (f' -p "{t.payee}"' if t.payee else "")
+ f' -m "{t.amount}"'
+ f' -d "{t.date}"'
f' -n "{t.notes}"'
+ (f' -n "{t.notes}"' if t.notes else "")
)
debug("Actual command: %s", cmd)
proc = await asyncio.create_subprocess_shell(
@ -83,16 +78,13 @@ class App:
stderr=asyncio.subprocess.STDOUT,
stdout=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
stdout, _stderr = await proc.communicate()
if proc.returncode != 0:
error("Submitting to actual failed: %s", stdout)
async def process_message(self, msg_b: bytes):
debug("parsing message")
msg = cast(
EmailMessage, email.message_from_bytes(msg_b, policy=email.policy.default)
)
pprint(msg)
msg = email.message_from_bytes(msg_b, policy=email.policy.default)
info(
"Found message from %s to %s subject %s",
msg.get("From", "<unknown>"),
@ -106,8 +98,11 @@ class App:
if parser.match(msg):
info("Parser %s claimed message", type(parser).__name__)
trans = parser.extract(msg)
info("Submitting transaction to Actual: %s", trans)
await self.submit_transaction(trans)
if trans is not None:
info("Submitting transaction to Actual: %s", trans)
await self.submit_transaction(trans)
else:
warning("Parser %s returned None", type(parser).__name__)
except TransactionParsingFailed as e:
warning("Unable to parse message %s", e)
except Exception as e:
@ -121,15 +116,23 @@ class App:
M.starttls(context)
M.login(self._config.imap_user, self._config.imap_pass)
M.select(self._config.imap_mailbox)
status, m_set = M.search(None, "UNSEEN")
_status, m_set = M.search(None, "UNSEEN")
for msg_id in m_set[0].split():
debug("Retrieving msg id %s", msg_id)
status, msg = M.fetch(msg_id, "(RFC822)")
await self.process_message(msg[0][1])
_status, msg = M.fetch(msg_id, "(RFC822)")
if _status != "OK" or msg[0] is None:
error("Unable to fetch message %s", msg_id)
continue
msg_body = msg[0][1]
if isinstance(msg_body, int):
error("Unable to fetch message %s", msg_id)
continue
debug("Processing message %s", msg_id)
await self.process_message(msg_body)
async def run(self):
async for tick in ticker(self._config.imap_interval):
async for _ in ticker(self._config.imap_interval):
await self.poll_imap()