From e0cc0383edea7c0e1bfb339722785b92a0b0ff04 Mon Sep 17 00:00:00 2001 From: Thomas Steinacher Date: Thu, 27 Jun 2024 18:00:13 -0500 Subject: [PATCH] Improve Outlook forward style --- quotequail/_html.py | 3 ++- quotequail/_patterns.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/quotequail/_html.py b/quotequail/_html.py index 15777b5..b4c8b12 100644 --- a/quotequail/_html.py +++ b/quotequail/_html.py @@ -383,7 +383,8 @@ def _trim_spaces(text: str) -> str: is_forward = ( is_block and state == BEGIN - and el.attrib.get("style") in FORWARD_STYLES + and (style := el.attrib.get("style")) + and any(style_re.match(style) for style_re in FORWARD_STYLES) ) if is_block or line_break: diff --git a/quotequail/_patterns.py b/quotequail/_patterns.py index 788adf8..6653ff6 100644 --- a/quotequail/_patterns.py +++ b/quotequail/_patterns.py @@ -48,8 +48,16 @@ ) FORWARD_STYLES = [ - # Outlook - "border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in", + # Outlook starts forwards directly with the "From: " line but we can catch + # it with the header to avoid falsely identifying a forward + # - #B5C4DF and #E1E1E1 are known border colors. + # - "padding:3.0pt 0in 0in 0in" and "padding:3.0pt 0cm 0cm 0cm" are known + # paddings. + re.compile( + r"^border:none;border-top:solid #[0-9a-fA-f]{6} 1\.0pt;" + r"padding:3\.0pt 0(in|cm) 0(in|cm) 0(in|cm)$", + re.UNICODE, + ), ] HEADER_RE = re.compile(r"\*?([-\w ]+):\*?(.*)$", re.UNICODE)