fix bmo parser for new format / lints
This commit is contained in:
@@ -58,6 +58,18 @@ class TransactionParser(ABC):
|
||||
raise TransactionParsingFailed("No content of message found")
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
def strip_html(msg: EmailMessage) -> str:
|
||||
body = msg.get_body(preferencelist=("html", "plain"))
|
||||
if body is None:
|
||||
raise TransactionParsingFailed("No HTML body of message found")
|
||||
content = body.get_content()
|
||||
if content is None:
|
||||
raise TransactionParsingFailed("No content of message found")
|
||||
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
return soup.get_text()
|
||||
|
||||
|
||||
class TransactionParsingFailed(Exception):
|
||||
pass
|
||||
@@ -136,8 +148,8 @@ class MBNAParser(TransactionParser):
|
||||
|
||||
class BMOParser(TransactionParser):
|
||||
EXTRACT_RE = re.compile(
|
||||
r"We want to let you know that a (withdrawal|deposit) of\s+\$([0-9,]+\.\d{2})\s+has been made (?:to|from) your account ending\s+in\s+(\d{3})", # noqa: E501
|
||||
flags=re.MULTILINE,
|
||||
r"There was a (withdrawal|deposit).*Amount:\s*\$([0-9,]+\.\d{2}).*Account:\s*Ending in ([0-9]+)", # noqa: E501
|
||||
flags=re.DOTALL,
|
||||
)
|
||||
|
||||
def __init__(self, account_map: dict[int, UUID]):
|
||||
@@ -147,9 +159,8 @@ class BMOParser(TransactionParser):
|
||||
return msg["From"] == "bmoalerts@bmo.com"
|
||||
|
||||
def extract(self, msg: EmailMessage) -> Optional[Transaction]:
|
||||
content = self.get_content(msg)
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
matches = self.EXTRACT_RE.search(soup.get_text())
|
||||
content = self.strip_html(msg)
|
||||
matches = self.EXTRACT_RE.search(content)
|
||||
if matches is None:
|
||||
raise TransactionParsingFailed("No matches for extraction RE")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user