diff --git a/CHANGELOG.md b/CHANGELOG.md index fa31489..f0aebee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changes +## v0.4.0 +* Add `quote_intro_line` parameter to `quote` and `quote_html`. +* Modernize all tests. + ## v0.3.1 * Fix `unwrap_html` when no result was found. diff --git a/pyproject.toml b/pyproject.toml index 9276b47..cf1346e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,10 @@ max-complexity = 15 max-branches = 16 [tool.ruff.lint.per-file-ignores] -"tests/test_quotequail.py" = ["E501", "PT009"] +"tests/test_internal.py" = ["E501"] +"tests/test_quote.py" = ["E501"] +"tests/test_quote_html.py" = ["E501"] +"tests/test_unwrap.py" = ["E501"] "tests/test_unwrap_html.py" = ["E501"] [tool.mypy] diff --git a/quotequail/__init__.py b/quotequail/__init__.py index f42a60a..d01df4f 100644 --- a/quotequail/__init__.py +++ b/quotequail/__init__.py @@ -2,42 +2,66 @@ # a library that identifies quoted text in email messages from . import _internal, _patterns +from ._enums import Position -__version__ = "0.3.1" +__version__ = "0.4.0" __all__ = ["quote", "quote_html", "unwrap", "unwrap_html"] -def quote(text: str, limit: int = 1000) -> list[tuple[bool, str]]: +def quote( + text: str, *, limit: int = 1000, quote_intro_line: bool = False +) -> list[tuple[bool, str]]: """ - Take a plain text message as an argument, return a list of tuples. The - first argument of the tuple denotes whether the text should be expanded by - default. The second argument is the unmodified corresponding text. - - Example: [(True, 'expanded text'), (False, '> Some quoted text')] - - Unless the limit param is set to None, the text will automatically be - quoted starting at the line where the limit is reached. + Divide email body into quoted parts. + + Args: + text: Plain text message. + limit: If set, the text will automatically be quoted starting at the + line where the limit is reached. + quote_intro_line: Whether the line introducing the quoted text ("On ... + wrote:" / "Begin forwarded message:") should be part of the quoted + text. + + Returns: + List of tuples: The first argument of the tuple denotes whether the + text should be expanded by default. The second argument is the + unmodified corresponding text. + + Example: [(True, 'expanded text'), (False, '> Some quoted text')] """ lines = text.split("\n") + position = Position.Begin if quote_intro_line else Position.End found = _internal.find_quote_position( - lines, _patterns.MAX_WRAP_LINES, limit + lines, + _patterns.MAX_WRAP_LINES, + limit=limit, + position=position, ) - if found is not None: - return [ - (True, "\n".join(lines[: found + 1])), - (False, "\n".join(lines[found + 1 :])), - ] + if found is None: + return [(True, text)] - return [(True, text)] + split_idx = found if quote_intro_line else found + 1 + return [ + (True, "\n".join(lines[:split_idx])), + (False, "\n".join(lines[split_idx:])), + ] -def quote_html(html: str, limit: int = 1000) -> list[tuple[bool, str]]: +def quote_html( + html: str, *, limit: int = 1000, quote_intro_line: bool = False +) -> list[tuple[bool, str]]: """ - Like quote(), but takes an HTML message as an argument. The limit param - represents the maximum number of lines to traverse until quoting the rest - of the markup. Lines are separated by block elements or
. + Like quote(), but takes an HTML message as an argument. + + Args: + html: HTML message. + limit: Maximum number of lines to traverse until quoting the rest of + the markup. Lines are separated by block elements or
. + quote_intro_line: Whether the line introducing the quoted text ("On ... + wrote:" / "Begin forwarded message:") should be part of the quoted + text. """ from . import _html @@ -45,16 +69,20 @@ def quote_html(html: str, limit: int = 1000) -> list[tuple[bool, str]]: start_refs, end_refs, lines = _html.get_line_info(tree, limit + 1) - found = _internal.find_quote_position(lines, 1, limit) + position = Position.Begin if quote_intro_line else Position.End + found = _internal.find_quote_position( + lines, 1, limit=limit, position=position + ) if found is None: # No quoting found and we're below limit. We're done. return [(True, _html.render_html_tree(tree))] + split_idx = found if quote_intro_line else found + 1 start_tree = _html.slice_tree( - tree, start_refs, end_refs, (0, found + 1), html_copy=html + tree, start_refs, end_refs, (0, split_idx), html_copy=html ) - end_tree = _html.slice_tree(tree, start_refs, end_refs, (found + 1, None)) + end_tree = _html.slice_tree(tree, start_refs, end_refs, (split_idx, None)) return [ (True, _html.render_html_tree(start_tree)), diff --git a/quotequail/_enums.py b/quotequail/_enums.py new file mode 100644 index 0000000..b259d64 --- /dev/null +++ b/quotequail/_enums.py @@ -0,0 +1,6 @@ +from enum import Enum + + +class Position(Enum): + Begin = "begin" + End = "end" diff --git a/quotequail/_html.py b/quotequail/_html.py index 5be2a53..3c34b47 100644 --- a/quotequail/_html.py +++ b/quotequail/_html.py @@ -1,5 +1,4 @@ # HTML utils -import enum from collections.abc import Iterator from typing import TYPE_CHECKING, TypeAlias @@ -9,14 +8,9 @@ if TYPE_CHECKING: from lxml.html import HtmlElement +from ._enums import Position from ._patterns import FORWARD_LINE, FORWARD_STYLES, MULTIPLE_WHITESPACE_RE - -class Position(enum.Enum): - Begin = "begin" - End = "end" - - Element: TypeAlias = "HtmlElement" ElementRef = tuple["Element", Position] diff --git a/quotequail/_internal.py b/quotequail/_internal.py index 0872fc8..79c9d90 100644 --- a/quotequail/_internal.py +++ b/quotequail/_internal.py @@ -1,3 +1,6 @@ +from typing_extensions import assert_never + +from ._enums import Position from ._patterns import ( COMPILED_PATTERN_MAP, HEADER_MAP, @@ -13,7 +16,10 @@ def find_pattern_on_line( - lines: list[str], n: int, max_wrap_lines: int + lines: list[str], + n: int, + max_wrap_lines: int, + position: Position, ) -> tuple[int, str] | None: """ Find a forward/reply pattern within the given lines on text on the given @@ -30,20 +36,42 @@ def find_pattern_on_line( match_line = join_wrapped_lines(lines[n : n + 1 + m]) if match_line.startswith(">"): match_line = match_line[1:].strip() + # If this line is blank, break out of the innermost loop + # at m == 0 so that if the quoting starts in the following + # line, we'll correctly detect the start of the quoting + # position. + if not match_line: + break if regex.match(match_line.strip()): - return n + m, typ + match position: + case Position.Begin: + return n, typ + case Position.End: + return n + m, typ + case _: + assert_never(position) return None def find_quote_position( - lines: list[str], max_wrap_lines: int, limit: int | None = None + lines: list[str], + max_wrap_lines: int, + limit: int | None = None, + position: Position = Position.End, ) -> int | None: """ - Return the (ending) line number of a quoting pattern. If a limit is given - and the limit is reached, the limit is returned. + Return the beginning or ending line number of a quoting pattern. + + Args: + lines: List of lines of text. + max_wrap_lines: Amount to lines to join to check for potential wrapped + patterns. + limit: If line limit is given and reached without finding a pattern, + the limit is returned. + position: Whether to return the beginning or ending line number. """ for n in range(len(lines)): - result = find_pattern_on_line(lines, n, max_wrap_lines) + result = find_pattern_on_line(lines, n, max_wrap_lines, position) if result: return result[0] if limit is not None and n >= limit - 1: @@ -189,7 +217,7 @@ def find_unwrap_start( # Find a forward / reply start pattern - result = find_pattern_on_line(lines, n, max_wrap_lines) + result = find_pattern_on_line(lines, n, max_wrap_lines, Position.End) if result: end, typ = result return n, end, typ diff --git a/requirements_tests.txt b/requirements_tests.txt index 2342e6b..9d17770 100644 --- a/requirements_tests.txt +++ b/requirements_tests.txt @@ -1,2 +1,3 @@ lxml==5.2.2 pytest==8.2.2 +typing-extensions==4.12.2 diff --git a/setup.py b/setup.py index 03b7dfb..2446b3a 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ ], test_suite="tests", tests_require=["lxml"], + install_requires=["typing_extensions>=4.1"], platforms="any", classifiers=[ "Environment :: Web Environment", diff --git a/tests/test_internal.py b/tests/test_internal.py new file mode 100644 index 0000000..932444b --- /dev/null +++ b/tests/test_internal.py @@ -0,0 +1,135 @@ +import pytest + +from quotequail._internal import extract_headers, parse_reply + + +@pytest.mark.parametrize( + ("line", "expected"), + [ + # German + ( + "Am 24.02.2015 um 22:48 schrieb John Doe :", + { + "date": "24.02.2015 um 22:48", + "from": "John Doe ", + }, + ), + # English + ( + "On Monday, March 7, 2016 10:19 AM, John Doe wrote:", + { + "date": "Monday, March 7, 2016 10:19 AM", + "from": "John Doe ", + }, + ), + ( + "On Feb 22, 2015, at 9:19 PM, John Doe wrote:", + { + "date": "Feb 22, 2015, at 9:19 PM", + "from": "John Doe ", + }, + ), + ( + "On 2016-03-14, at 20:26, John Doe wrote:", + { + "date": "2016-03-14, at 20:26", + "from": "John Doe ", + }, + ), + ( + "On 8 o'clock, John Doe wrote:", + {"date": "8 o'clock", "from": "John Doe"}, + ), + # French + ( + "Le 6 janv. 2014 à 19:50, John Doe a écrit :", + { + "date": "6 janv. 2014 \xe0 19:50", + "from": "John Doe ", + }, + ), + ( + "Le 02.10.2013 à 11:13, John Doe a écrit :", + { + "date": "02.10.2013 \xe0 11:13", + "from": "John Doe ", + }, + ), + # Spanish + ( + "El 11/07/2012 06:13 p.m., John Doe escribió:", + {"date": "11/07/2012 06:13 p.m.", "from": "John Doe"}, + ), + ( + "El 06/04/2010, a las 13:13, John Doe escribió:", + {"date": "06/04/2010, a las 13:13", "from": "John Doe"}, + ), + # Swedish + ( + "Den 24 februari 2015 22:48 skrev John Doe :", + { + "date": "24 februari 2015 22:48", + "from": "John Doe ", + }, + ), + # Brazillian portuguese + ( + "Em qui, 24 de jan de 2019 às 14:31, John Doe escreveu:", + { + "date": "qui, 24 de jan de 2019 às 14:31", + "from": "John Doe ", + }, + ), + # Other + ( + "2009/5/12 John Doe ", + {"date": "2009/5/12", "from": "John Doe "}, + ), + ], +) +def test_parse_reply(line, expected): + assert parse_reply(line) == expected + + +def test_extract_headers(): + assert extract_headers([], 2) == ({}, 0) + assert extract_headers(["test"], 2) == ({}, 0) + assert extract_headers(["From: b", "To: c"], 2) == ( + {"from": "b", "to": "c"}, + 2, + ) + assert extract_headers(["From: b", "foo"], 2) == ({"from": "b foo"}, 2) + assert extract_headers(["From: b", "foo"], 1) == ({"from": "b"}, 1) + assert extract_headers(["From: b", "To: c", "", "other line"], 2) == ( + {"from": "b", "to": "c"}, + 2, + ) + assert extract_headers( + [ + "From: some very very very long name <", + "verylong@example.com>", + "Subject: this is a very very very very long", + "subject", + "", + "other line", + ], + 2, + ) == ( + { + "from": "some very very very long name ", + "subject": "this is a very very very very long subject", + }, + 4, + ) + assert extract_headers( + [ + "From: some very very very long name <", + "verylong@example.com>", + ], + 1, + ) == ( + { + "from": "some very very very long name <", + }, + 1, + ) diff --git a/tests/test_quote.py b/tests/test_quote.py new file mode 100644 index 0000000..e4a4514 --- /dev/null +++ b/tests/test_quote.py @@ -0,0 +1,179 @@ +import pytest + +from quotequail import quote + + +@pytest.mark.parametrize( + ("text", "expected", "expected_quote_intro_line"), + [ + # Reply patterns. + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone wrote: + +> Some quoted text +""", + [ + ( + True, + "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone wrote:", + ), + (False, "\n> Some quoted text\n"), + ], + [ + ( + True, + "Hello world.\n", + ), + ( + False, + "On 2012-10-16 at 17:02 , Someone wrote:\n\n> Some quoted text\n", + ), + ], + ), + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone < +someone@example.com> wrote: + +> Some quoted text +""", + [ + ( + True, + "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:", + ), + (False, "\n> Some quoted text\n"), + ], + [ + ( + True, + "Hello world.\n", + ), + ( + False, + "On 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:\n\n> Some quoted text\n", + ), + ], + ), + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone +wrote: + +> Some quoted text +""", + [ + ( + True, + "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone \nwrote:", + ), + (False, "\n> Some quoted text\n"), + ], + [ + ( + True, + "Hello world.\n", + ), + ( + False, + "On 2012-10-16 at 17:02 , Someone \nwrote:\n\n> Some quoted text\n", + ), + ], + ), + # Forward patterns. + ( + """Hello world. + +Begin forwarded message: + +> From: Someone +> Subject: The email +> +> Some quoted text. +""", + [ + (True, "Hello world.\n\nBegin forwarded message:"), + ( + False, + "\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", + ), + ], + [ + (True, "Hello world.\n"), + ( + False, + "Begin forwarded message:\n\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", + ), + ], + ), + ( + """Hello world. + +---------- Forwarded message ---------- +From: Someone +Subject: The email + +Some quoted text. +""", + [ + ( + True, + "Hello world.\n\n---------- Forwarded message ----------", + ), + ( + False, + "From: Someone \nSubject: The email\n\nSome quoted text.\n", + ), + ], + [ + ( + True, + "Hello world.\n", + ), + ( + False, + "---------- Forwarded message ----------\nFrom: Someone \nSubject: The email\n\nSome quoted text.\n", + ), + ], + ), + ( + """Hello world. + +> Begin forwarded message: +> +> From: Someone +> Subject: The email +> +> Some quoted text. +""", + [ + (True, "Hello world.\n\n> Begin forwarded message:"), + ( + False, + ">\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", + ), + ], + [ + (True, "Hello world.\n"), + ( + False, + "> Begin forwarded message:\n>\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", + ), + ], + ), + ], +) +def test_quote(text, expected, expected_quote_intro_line): + assert quote(text) == expected + assert quote(text, quote_intro_line=True) == expected_quote_intro_line + + +def test_limit(): + assert quote("Lorem\nIpsum\nDolor\nSit\nAmet", limit=2) == [ + (True, "Lorem\nIpsum"), + (False, "Dolor\nSit\nAmet"), + ] diff --git a/tests/test_quote_html.py b/tests/test_quote_html.py new file mode 100644 index 0000000..0ec9f41 --- /dev/null +++ b/tests/test_quote_html.py @@ -0,0 +1,265 @@ +import pytest + +from quotequail import quote_html + + +@pytest.mark.parametrize( + ("html", "expected", "expected_quote_intro_line"), + [ + # Apple + ( + """Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:

Lorem ipsum dolor sit amet.


""", + [ + # Note that lxml removes Content-Type meta tags (see + # lxml.html.tostring include_meta_content_type flag) + ( + True, + """Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:
""", + ), + # Note we have an empty div stripped out here. + ( + False, + """

Lorem ipsum dolor sit amet.


""", + ), + ], + [ + ( + True, + 'Some text

some more text

', + ), + ( + False, + '
On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:

Lorem ipsum dolor sit amet.


', + ), + ], + ), + # Gmail (1) + ( + """

---------- Forwarded message ----------
From: Some One <someone@example.com> +



--
Some One
+
""", + [ + ( + True, + """

---------- Forwarded message ----------
""", + ), + ( + False, + """
From: Some One <someone@example.com> +



--
Some One
+
""", + ), + ], + [ + (True, '
'), + ( + False, + '
---------- Forwarded message ----------
From: Some One <someone@example.com>\n



--
Some One
\n
', + ), + ], + ), + # Gmail (2) + ( + """
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
\r\n""", + [ + ( + True, + """
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
""", + ), + ( + False, + """
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
""", + ), + ], + [ + ( + True, + '
looks good\xa0
', + ), + ( + False, + '
On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
', + ), + ], + ), + # Outlook + ( + """

Thanks,

 

 

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

 

Hey,

""", + [ + ( + True, + '

Thanks,

\xa0

\xa0

', + ), + ( + False, + '

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

\xa0

Hey,

', + ), + ], + [ + ( + True, + '

Thanks,

\xa0

\xa0

', + ), + ( + False, + '

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

\xa0

Hey,

', + ), + ], + ), + # Newline in "Am\r\n26. Mai" should not change the way we match. + ( + """\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\nAm\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n\r\n""", + [ + ( + True, + '\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\n

Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

', + ), + ( + False, + '\r\n\r\n\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n', + ), + ], + [ + ( + True, + '\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

', + ), + ( + False, + '\r\n\r\n\r\n

Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n', + ), + ], + ), + # No wrap tag. + ( + """On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
""", + [ + ( + True, + "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:", + ), + (False, "
some stuff
"), + ], + [ + ( + True, + "", + ), + ( + False, + "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
", + ), + ], + ), + # Images + ( + """
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there \"*B-)\"*:P\"*:-~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
""", + [ + ( + True, + """
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
""", + ), + ( + False, + """
Hi there\xa0\"*B-)\"*:P\"*:->~~
""", + ), + ], + [ + ( + True, + """
Well hello there Sir!!!

""", + ), + ( + False, + """
On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there\xa0\"*B-)\"*:P\"*:->~~
""", + ), + ], + ), + ], +) +def test_quote_html(html, expected, expected_quote_intro_line): + assert quote_html(html) == expected + assert quote_html(html, quote_intro_line=True) == expected_quote_intro_line + + +def test_no_quote(): + assert quote_html("""

One

Two

Three

""") == [ + (True, "

One

Two

Three

"), + ] + + +def test_limit(): + assert quote_html( + """

One

Two

Three

Four

""", limit=3 + ) == [ + (True, "

One

Two

Three

"), + (False, "

Four

"), + ] + + +def test_empty(): + assert quote_html("") == [ + (True, ""), + ] + + +def test_comment(): + assert quote_html("""""") == [ + (True, ""), + ] + + +def test_comment_2(): + assert quote_html("""AB""") == [ + (True, "AB"), + ] + + +def test_comment_3(): + assert quote_html( + """

Begin forwarded message:

""" + ) == [ + (True, "

Begin forwarded message:"), + (False, "
"), + ] + + +def test_prefix_tag(): + assert quote_html("""A
Begin forwarded message:B""") == [ + (True, "A
Begin forwarded message:B"), + ] + + +def test_prefix_tag_2(): + # We can't preserve the exact markup due to lxml's parsing here. + assert quote_html("""A
Begin forwarded message:B""") == [ + (True, "A
Begin forwarded message:B"), + ] + + +def test_encoding(): + # We assume everything is UTF-8 + assert quote_html(""" + + + + + + + +test ä + +""") == [ + ( + True, + """ + + + + + +test ä + +""", + ), + ] diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py deleted file mode 100644 index 243de70..0000000 --- a/tests/test_quotequail.py +++ /dev/null @@ -1,1177 +0,0 @@ -import os -import unittest - -from quotequail import quote, quote_html, unwrap, unwrap_html - - -class FileMixin: - def read_file(self, name): - with open( - os.path.join(os.path.dirname(__file__), "files", name), "rb" - ) as f: - return f.read().decode("utf8") - - def assert_equal_to_file(self, string, name): - expected = self.read_file(name) - self.assertEqual(string, expected) - - -class QuoteTestCase(unittest.TestCase): - def test_quote_reply_1(self): - self.assertEqual( - quote( - """Hello world. - -On 2012-10-16 at 17:02 , Someone wrote: - -> Some quoted text -""" - ), - [ - ( - True, - "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone wrote:", - ), - (False, "\n> Some quoted text\n"), - ], - ) - - def test_quote_reply_2(self): - self.assertEqual( - quote( - """Hello world. - -On 2012-10-16 at 17:02 , Someone < -someone@example.com> wrote: - -> Some quoted text -""" - ), - [ - ( - True, - "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone <\nsomeone@example.com> wrote:", - ), - (False, "\n> Some quoted text\n"), - ], - ) - - def test_quote_reply_3(self): - self.assertEqual( - quote( - """Hello world. - -On 2012-10-16 at 17:02 , Someone -wrote: - -> Some quoted text -""" - ), - [ - ( - True, - "Hello world.\n\nOn 2012-10-16 at 17:02 , Someone \nwrote:", - ), - (False, "\n> Some quoted text\n"), - ], - ) - - def test_quote_forward_1(self): - self.assertEqual( - quote( - """Hello world. - -Begin forwarded message: - -> From: Someone -> Subject: The email -> -> Some quoted text. -""" - ), - [ - (True, "Hello world.\n\nBegin forwarded message:"), - ( - False, - "\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", - ), - ], - ) - - def test_quote_forward_2(self): - self.assertEqual( - quote( - """Hello world. - ----------- Forwarded message ---------- -From: Someone -Subject: The email - -Some quoted text. -""" - ), - [ - ( - True, - "Hello world.\n\n---------- Forwarded message ----------", - ), - ( - False, - "From: Someone \nSubject: The email\n\nSome quoted text.\n", - ), - ], - ) - - def test_quote_forward_3(self): - self.assertEqual( - quote( - """Hello world. - -> Begin forwarded message: -> -> From: Someone -> Subject: The email -> -> Some quoted text. -""" - ), - [ - (True, "Hello world.\n\n> Begin forwarded message:"), - ( - False, - ">\n> From: Someone \n> Subject: The email\n>\n> Some quoted text.\n", - ), - ], - ) - - def test_limit(self): - self.assertEqual( - quote("Lorem\nIpsum\nDolor\nSit\nAmet", limit=2), - [(True, "Lorem\nIpsum"), (False, "Dolor\nSit\nAmet")], - ) - - -class HTMLQuoteTestCase(unittest.TestCase): - def test_apple(self): - self.assertEqual( - quote_html( - """Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:

Lorem ipsum dolor sit amet.


""" - ), - [ - # Note that lxml removes Content-Type meta tags (see - # lxml.html.tostring include_meta_content_type flag) - ( - True, - """Some text

some more text


On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:
""", - ), - # Note we have an empty div stripped out here. - ( - False, - """

Lorem ipsum dolor sit amet.


""", - ), - ], - ) - - def test_gmail(self): - self.assertEqual( - quote_html( - """

---------- Forwarded message ----------
From: Some One <someone@example.com> -



--
Some One
-
""" - ), - [ - ( - True, - """

---------- Forwarded message ----------
""", - ), - ( - False, - """
From: Some One <someone@example.com> -



--
Some One
-
""", - ), - ], - ) - - def test_gmail_2(self): - self.assertEqual( - quote_html( - """
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
\r\n""" - ), - [ - ( - True, - """
looks good\xa0

On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
""", - ), - ( - False, - """
Hey Phil,\xa0

Sending you the report:\xa0


--
Cheers,
foo & example Team
\r\n
\r\n
""", - ), - ], - ) - - def test_outlook(self): - self.assertEqual( - quote_html( - """

Thanks,

 

 

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

 

Hey,

""" - ), - [ - ( - True, - '

Thanks,

\xa0

\xa0

', - ), - ( - False, - '

From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!

\xa0

Hey,

', - ), - ], - ) - - def test_no_wrap_tag(self): - self.assertEqual( - quote_html( - """On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff
""" - ), - [ - ( - True, - "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:", - ), - (False, "
some stuff
"), - ], - ) - - def test_images(self): - self.assertEqual( - quote_html( - """
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
Hi there \"*B-)\"*:P\"*:-~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
""" - ), - [ - ( - True, - """
Well hello there Sir!!!


On Dec 23, 2014, at 04:35 PM, Steve Wiseman <wiseman.steve@ymail.com> wrote:
""", - ), - ( - False, - """
Hi there\xa0\"*B-)\"*:P\"*:->~~
""", - ), - ], - ) - - def test_no_quote(self): - self.assertEqual( - quote_html("""

One

Two

Three

"""), - [ - (True, "

One

Two

Three

"), - ], - ) - - def test_limit(self): - self.assertEqual( - quote_html( - """

One

Two

Three

Four

""", limit=3 - ), - [ - (True, "

One

Two

Three

"), - (False, "

Four

"), - ], - ) - - def test_empty(self): - self.assertEqual( - quote_html(""), - [ - (True, ""), - ], - ) - - def test_comment(self): - self.assertEqual( - quote_html(""""""), - [ - (True, ""), - ], - ) - - def test_comment_2(self): - self.assertEqual( - quote_html("""AB"""), - [ - (True, "AB"), - ], - ) - - def test_comment_3(self): - self.assertEqual( - quote_html( - """

Begin forwarded message:

""" - ), - [ - (True, "

Begin forwarded message:"), - (False, "
"), - ], - ) - - def test_prefix_tag(self): - self.assertEqual( - quote_html("""A
Begin forwarded message:B"""), - [ - (True, "A
Begin forwarded message:B"), - ], - ) - - def test_prefix_tag_2(self): - # We can't preserve the exact markup due to lxml's parsing here. - self.assertEqual( - quote_html("""A
Begin forwarded message:B"""), - [ - (True, "A
Begin forwarded message:B"), - ], - ) - - def test_encoding(self): - # We assume everything is UTF-8 - self.assertEqual( - quote_html( - """ - - - - - - - -test ä - -""" - ), - [ - ( - True, - """ - - - - - -test ä - -""", - ), - ], - ) - - def test_newline(self): - # Newline in "Am\r\n26. Mai" should not change the way we match. - self.assertEqual( - quote_html( - """\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\nAm\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n\r\n""" - ), - [ - ( - True, - '\r\n\r\n\r\n\r\n\r\n
\r\n
\r\n

Here is spam.
\r\nHam

\r\n
\r\n
\r\n

Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:

', - ), - ( - False, - '\r\n\r\n\r\n
Hey\r\nHam,

I like spam.
\r\n
\r\n
\r\n\r\n', - ), - ], - ) - - -class UnwrapTestCase(unittest.TestCase): - def test_gmail_forward(self): - # Gmail forward - self.assertEqual( - unwrap( - """Hello - ----------- Forwarded message ---------- -From: Someone -Date: Fri, Apr 26, 2013 at 8:13 PM -Subject: Weekend Spanish classes -To: recipient@example.com - -Spanish Classes -Learn Spanish -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": "Someone ", - "date": "Fri, Apr 26, 2013 at 8:13 PM", - "subject": "Weekend Spanish classes", - "to": "recipient@example.com", - "text": "Spanish Classes\nLearn Spanish", - }, - ) - - def test_apple_forward(self): - # Apple Mail (10.9 and earlier) forward - self.assertEqual( - unwrap( - """Hello - -Begin forwarded message: - -> From: "Some One" -> Date: 1. August 2011 23:28:15 GMT-07:00 -> To: "Other Person" -> Subject: AW: AW: Some subject -> -> Original text - -Text bottom -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": '"Some One" ', - "date": "1. August 2011 23:28:15 GMT-07:00", - "subject": "AW: AW: Some subject", - "to": '"Other Person" ', - "text": "Original text", - "text_bottom": "Text bottom", - }, - ) - - def test_apple_forward_2(self): - # Apple Mail (10.10) forward - self.assertEqual( - unwrap( - """Hello - -> Begin forwarded message: -> -> From: "Some One" -> Date: 1. August 2011 23:28:15 GMT-07:00 -> To: "Other Person" -> Subject: AW: AW: Some subject -> -> Original text - -Text bottom -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": '"Some One" ', - "date": "1. August 2011 23:28:15 GMT-07:00", - "subject": "AW: AW: Some subject", - "to": '"Other Person" ', - "text": "Original text", - "text_bottom": "Text bottom", - }, - ) - - def test_sparrow_forward(self): - # Sparrow forward - self.assertEqual( - unwrap( - """Hello - -Forwarded message: - -> From: Some One -> To: Other person -> Date: Thursday, March 7, 2013 7:09:41 PM -> Subject: Re: Syncing Up -> -> OHAI -> -> Great news! - -Text bottom -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": "Some One ", - "date": "Thursday, March 7, 2013 7:09:41 PM", - "subject": "Re: Syncing Up", - "to": "Other person ", - "text": "OHAI\n\nGreat news!", - "text_bottom": "Text bottom", - }, - ) - - def test_bold_headers(self): - # Forwrad with *bold* text - self.assertEqual( - unwrap( - """Hello - -Forwarded message: - -*From:* Some One -*To:* Other Person -*Date:* Wednesday, February 6, 2013 7:46:53 AM -*Subject:* Fwd: Hottest Startups - -This is interesting.""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": "Some One ", - "date": "Wednesday, February 6, 2013 7:46:53 AM", - "subject": "Fwd: Hottest Startups", - "to": "Other Person ", - "text": "This is interesting.", - }, - ) - - def test_no_forward_text(self): - # No forwarding message text - self.assertEqual( - unwrap( - """Hello - -From: "Some One" -Date: 1. August 2011 23:28:15 GMT-07:00 -To: "Other Person" -Subject: AW: AW: Some subject - -Original text -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": '"Some One" ', - "date": "1. August 2011 23:28:15 GMT-07:00", - "subject": "AW: AW: Some subject", - "to": '"Other Person" ', - "text": "Original text", - }, - ) - - def test_no_forward_text_quoted(self): - # No forwarding message text - self.assertEqual( - unwrap( - """Hello - -> From: "Some One" -> Date: 1. August 2011 23:28:15 GMT-07:00 -> To: "Other Person" -> Subject: AW: AW: Some subject -> -> Original text -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": '"Some One" ', - "date": "1. August 2011 23:28:15 GMT-07:00", - "subject": "AW: AW: Some subject", - "to": '"Other Person" ', - "text": "Original text", - }, - ) - - def test_outlook_forward(self): - # Outlook? - self.assertEqual( - unwrap( - """-------- Original Message -------- -Subject: \tSome Newsletter -Date: \tFri, 19 Jun 2009 19:16:04 +0200 -From: \tfrom -Reply-To: \treply -To: \tto@example.com - -OHAI""" - ), - { - "type": "forward", - "from": "from ", - "reply-to": "reply ", - "date": "Fri, 19 Jun 2009 19:16:04 +0200", - "subject": "Some Newsletter", - "to": "to@example.com", - "text": "OHAI", - }, - ) - - def test_spacing(self): - # Some clients (Blackberry?) have weird whitespace rules - self.assertEqual( - unwrap( - """hello world - ------Original Message----- -From: "Some One" - -Date: Sat, 22 Mar 2008 12:16:06 -To: - - -Subject: Antw: FW: html - - -OHAI... -""" - ), - { - "text_top": "hello world", - "type": "forward", - "from": '"Some One" ', - "date": "Sat, 22 Mar 2008 12:16:06", - "subject": "Antw: FW: html", - "to": "", - "text": "OHAI...", - }, - ) - - def test_quote(self): - # Just a quote - self.assertEqual( - unwrap( - """hello world - -Hey: This is very important - -> Lorem ipsum -> dolor sit amet -> adipiscing elit. - --- -kthxbye -""" - ), - { - "type": "quote", - "text_top": "hello world\n\nHey: This is very important", - "text": "Lorem ipsum\ndolor sit amet\nadipiscing elit.", - "text_bottom": "--\nkthxbye", - }, - ) - - def test_no_message(self): - # No message - self.assertEqual( - unwrap( - """hello world - -Hey: This is very important - -> No quoted message (just one line). -""" - ), - None, - ) - - def test_forward_no_headers(self): - # No quote / headers in forwarded message - self.assertEqual( - unwrap( - """Begin forwarded message: -Hello -""" - ), - { - "type": "forward", - "text": "Hello", - }, - ) - - def test_confusing_email_signature(self): - self.assertEqual( - unwrap( - """Phone: 12345 -Fax: 67890 -Skype: foobar - ----------- Forwarded message ---------- -From: Someone -Subject: The email - -Email text. -""" - ), - { - "text_top": "Phone: 12345\nFax: 67890\nSkype: foobar", - "type": "forward", - "from": "Someone ", - "subject": "The email", - "text": "Email text.", - }, - ) - - def test_long_subject(self): - self.assertEqual( - unwrap( - """---------- Forwarded message ---------- -From: Someone -Subject: The email has a very long and confusing subject with spans over -multiple lines. -To: Destination - -Email text. -""" - ), - { - "type": "forward", - "from": "Someone ", - "to": "Destination ", - "subject": "The email has a very long and confusing subject with spans over multiple lines.", - "text": "Email text.", - }, - ) - - def test_reply_1(self): - data = unwrap( - """Hello world. - -On 2012-10-16 at 17:02 , Someone wrote: - -> Some quoted text -""" - ) - self.assertEqual( - data, - { - "type": "reply", - "date": "2012-10-16 at 17:02", - "from": "Someone ", - "text_top": "Hello world.", - "text": "Some quoted text", - }, - ) - - def test_reply_2(self): - data = unwrap( - """Hello world. - -On 2012-10-16 at 17:02 , Someone < -someone@example.com> wrote: - -> Some quoted text -""" - ) - self.assertEqual( - data, - { - "type": "reply", - "date": "2012-10-16 at 17:02", - "from": "Someone ", - "text_top": "Hello world.", - "text": "Some quoted text", - }, - ) - - def test_french(self): - self.assertEqual( - unwrap( - """ -De : Someone -Répondre à : Reply -Date : Wednesday, 17 September 2014 4:24 pm -À : "Someone Else" -Objet : Re: test subject - -Hello, thanks for your reply - """ - ), - { - "type": "forward", - "date": "Wednesday, 17 September 2014 4:24 pm", - "from": "Someone ", - "reply-to": "Reply ", - "to": '"Someone Else" ', - "subject": "Re: test subject", - "text": "Hello, thanks for your reply", - }, - ) - - def test_forward_french_apple_mail(self): - self.assertEqual( - unwrap( - """ -Text before - -Début du message réexpédié : - -De: "Foo Bar" -Date: 14 novembre 2015 15:14:53 UTC+1 -À: "'Ham Spam'" -Objet: RE: The subject - -Text after -""" - ), - { - "date": "14 novembre 2015 15:14:53 UTC+1", - "from": '"Foo Bar" ', - "subject": "RE: The subject", - "text": "Text after", - "text_top": "Text before", - "to": "\"'Ham Spam'\" ", - "type": "forward", - }, - ) - - def test_forward_french_thunderbird(self): - self.assertEqual( - unwrap( - """ -Text before - --------- Message transféré -------- -Sujet : Re: Some subject -Date : Wed, 11 Nov 2015 12:31:25 +0100 -De : Foo Bar -Pour : Ham Spam - -Text after -""" - ), - { - "date": "Wed, 11 Nov 2015 12:31:25 +0100", - "from": "Foo Bar ", - "subject": "Re: Some subject", - "text": "Text after", - "text_top": "Text before", - "to": "Ham Spam ", - "type": "forward", - }, - ) - - def test_gmail_forward_swedish(self): - # Gmail forward - self.assertEqual( - unwrap( - """Hello - ----------- Vidarebefordrat meddelande ---------- -Från: Someone -Datum: 26 april 2013 20:13 -Ämne: Weekend Spanish classes -Till: recipient@example.com - -Spanish Classes -Learn Spanish -""" - ), - { - "text_top": "Hello", - "type": "forward", - "from": "Someone ", - "date": "26 april 2013 20:13", - "subject": "Weekend Spanish classes", - "to": "recipient@example.com", - "text": "Spanish Classes\nLearn Spanish", - }, - ) - - -class HTMLUnwrapTestCase(FileMixin, unittest.TestCase): - def test_simple_forward(self): - html = "Begin forwarded message:
\n
\nFrom: someone@example.com
\nTo: anyone@example.com
\nSubject: You won
\n" - self.assertEqual( - unwrap_html(html), - { - "type": "forward", - "from": "someone@example.com", - "to": "anyone@example.com", - "subject": "You won", - }, - ) - - def test_apple_forward(self): - html = 'test

blah


Begin forwarded message:

From: Foo Bar <foo@bar.example>
Subject: The Subject
Date: March 24, 2016 at 20:16:25 GMT+1
To: John Doe <john@doe.example>

Text of the original email
' - - self.assertEqual( - unwrap_html(html), - { - "type": "forward", - "subject": "The Subject", - "date": "March 24, 2016 at 20:16:25 GMT+1", - "from": "Foo Bar ", - "to": "John Doe ", - "html_top": 'test

blah
', - "html": '
Text of the original email
', - }, - ) - - def test_gmail_forward(self): - html = '
test

blah

---------- Forwarded message ----------
From: Foo Bar <foo@bar.example>
Date: Thu, Mar 24, 2016 at 5:17 PM
Subject: The Subject
To: John Doe <john@doe.example>


Some text



' - - self.assertEqual( - unwrap_html(html), - { - "type": "forward", - "subject": "The Subject", - "date": "Thu, Mar 24, 2016 at 5:17 PM", - "from": "Foo Bar ", - "to": "John Doe ", - "html_top": '
test

blah
', - "html": '
Some text
', - }, - ) - - def test_apple_reply(self): - html = 'Foo

Bar

On 2016-03-25, at 23:01, John Doe <john@doe.example> wrote:

Some important email

' - - self.assertEqual( - unwrap_html(html), - { - "type": "reply", - "from": "John Doe ", - "date": "2016-03-25, at 23:01", - "html": '
Some important email
', - "html_top": 'Foo

Bar
', - }, - ) - - def test_gmail_reply(self): - html = """
foo

bar

On Wed, Mar 16, 2016 at 12:49 AM, Foo Bar <foo@bar.example> wrote:
Hi,
-
This is the reply
-
-Thanks a lot!
-Foo
-
-



--
John Doe
Senior Director
Some Company
-
-""" - - self.assertEqual( - unwrap_html(html), - { - "type": "reply", - "from": "Foo Bar ", - "date": "Wed, Mar 16, 2016 at 12:49 AM", - "html_top": '
foo

bar
', - "html": '
Hi,
\n
This is the reply
\n
\nThanks a lot!
\nFoo
', - "html_bottom": '
--
John Doe
Senior Director
Some Company
\n
\n', - }, - ) - - def test_outlook_forward(self): - data = self.read_file("outlook_forward.html") - result = unwrap_html(data) - self.assertEqual(result["type"], "forward") - self.assertEqual(result["from"], "John Doe") - self.assertEqual(result["to"], "Foo Bar (foo@bar.example)") - self.assertEqual(result["date"], "Wednesday, July 09, 2014 10:27 AM") - self.assertEqual(result["subject"], "The subject!") - self.assert_equal_to_file( - result["html"], "outlook_forward_unwrapped.html" - ) - self.assert_equal_to_file( - result["html_top"], "outlook_forward_unwrapped_top.html" - ) - self.assertNotIn("html_bottom", result) - - def test_thunderbird_forward(self): - data = self.read_file("thunderbird_forward.html") - result = unwrap_html(data) - self.assertEqual(result["type"], "forward") - self.assertEqual(result["from"], "John Doe ") - self.assertEqual(result["to"], "Foo Bar ") - self.assertEqual( - result["date"], "Tue, 3 May 2016 14:54:27 +0200 (CEST)" - ) - self.assertEqual(result["subject"], "Re: Example subject") - self.assertNotIn("html_top", result) - self.assert_equal_to_file( - result["html"], "thunderbird_forward_unwrapped.html" - ) - self.assertNotIn("html_bottom", result) - - def test_mailru_forward(self): - data = self.read_file("mailru_forward.html") - result = unwrap_html(data) - self.assertEqual(result["type"], "forward") - self.assertEqual( - result["from"], "Иван Иванов " - ) - self.assertEqual(result["to"], "Петр Петров ") - self.assertEqual(result["date"], "Среда, 14 июня 2017, 15:19 +03:00") - self.assertEqual(result["subject"], "Тестовая тема") - self.assertNotIn("html_top", result) - self.assert_equal_to_file( - result["html"], "mailru_forward_unwrapped.html" - ) - self.assertNotIn("html_bottom", result) - - -class InternalTestCase(unittest.TestCase): - def test_parse_reply(self): - from quotequail._internal import parse_reply - - data = parse_reply( - "Am 24.02.2015 um 22:48 schrieb John Doe :" - ) - self.assertEqual( - data, - { - "date": "24.02.2015 um 22:48", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "On Monday, March 7, 2016 10:19 AM, John Doe wrote:" - ) - self.assertEqual( - data, - { - "date": "Monday, March 7, 2016 10:19 AM", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "On Feb 22, 2015, at 9:19 PM, John Doe wrote:" - ) - self.assertEqual( - data, - { - "date": "Feb 22, 2015, at 9:19 PM", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "On 2016-03-14, at 20:26, John Doe wrote:" - ) - self.assertEqual( - data, - { - "date": "2016-03-14, at 20:26", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "Le 6 janv. 2014 à 19:50, John Doe a écrit :" - ) - self.assertEqual( - data, - { - "date": "6 janv. 2014 \xe0 19:50", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "Le 02.10.2013 à 11:13, John Doe a écrit :" - ) - self.assertEqual( - data, - { - "date": "02.10.2013 \xe0 11:13", - "from": "John Doe ", - }, - ) - - data = parse_reply("El 11/07/2012 06:13 p.m., John Doe escribió:") - self.assertEqual( - data, {"date": "11/07/2012 06:13 p.m.", "from": "John Doe"} - ) - - data = parse_reply("El 06/04/2010, a las 13:13, John Doe escribió:") - self.assertEqual( - data, {"date": "06/04/2010, a las 13:13", "from": "John Doe"} - ) - - data = parse_reply("2009/5/12 John Doe ") - self.assertEqual( - data, {"date": "2009/5/12", "from": "John Doe "} - ) - - data = parse_reply("On 8 o'clock, John Doe wrote:") - self.assertEqual(data, {"date": "8 o'clock", "from": "John Doe"}) - - # Swedish - data = parse_reply( - "Den 24 februari 2015 22:48 skrev John Doe :" - ) - self.assertEqual( - data, - { - "date": "24 februari 2015 22:48", - "from": "John Doe ", - }, - ) - - # Brazillian portuguese - data = parse_reply( - "Em qui, 24 de jan de 2019 às 14:31, John Doe escreveu:" - ) - self.assertEqual( - data, - { - "date": "qui, 24 de jan de 2019 às 14:31", - "from": "John Doe ", - }, - ) - - -class InternalHTMLTestCase(unittest.TestCase): - def test_extract_headers(self): - from quotequail._internal import extract_headers - - self.assertEqual(extract_headers([], 2), ({}, 0)) - self.assertEqual(extract_headers(["test"], 2), ({}, 0)) - self.assertEqual( - extract_headers(["From: b", "To: c"], 2), - ({"from": "b", "to": "c"}, 2), - ) - self.assertEqual( - extract_headers(["From: b", "foo"], 2), ({"from": "b foo"}, 2) - ) - self.assertEqual( - extract_headers(["From: b", "foo"], 1), ({"from": "b"}, 1) - ) - self.assertEqual( - extract_headers(["From: b", "To: c", "", "other line"], 2), - ({"from": "b", "to": "c"}, 2), - ) - self.assertEqual( - extract_headers( - [ - "From: some very very very long name <", - "verylong@example.com>", - "Subject: this is a very very very very long", - "subject", - "", - "other line", - ], - 2, - ), - ( - { - "from": "some very very very long name ", - "subject": "this is a very very very very long subject", - }, - 4, - ), - ) - self.assertEqual( - extract_headers( - [ - "From: some very very very long name <", - "verylong@example.com>", - ], - 1, - ), - ( - { - "from": "some very very very long name <", - }, - 1, - ), - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_unwrap.py b/tests/test_unwrap.py new file mode 100644 index 0000000..a785f44 --- /dev/null +++ b/tests/test_unwrap.py @@ -0,0 +1,427 @@ +import pytest + +from quotequail import unwrap + + +@pytest.mark.parametrize( + ("text", "expected"), + [ + # Gmail forward + ( + """Hello + +---------- Forwarded message ---------- +From: Someone +Date: Fri, Apr 26, 2013 at 8:13 PM +Subject: Weekend Spanish classes +To: recipient@example.com + +Spanish Classes +Learn Spanish +""", + { + "text_top": "Hello", + "type": "forward", + "from": "Someone ", + "date": "Fri, Apr 26, 2013 at 8:13 PM", + "subject": "Weekend Spanish classes", + "to": "recipient@example.com", + "text": "Spanish Classes\nLearn Spanish", + }, + ), + # Apple Mail (10.9 and earlier) forward + ( + """Hello + +Begin forwarded message: + +> From: "Some One" +> Date: 1. August 2011 23:28:15 GMT-07:00 +> To: "Other Person" +> Subject: AW: AW: Some subject +> +> Original text + +Text bottom +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + "text_bottom": "Text bottom", + }, + ), + ( + # Apple Mail (10.10) forward + """Hello + +> Begin forwarded message: +> +> From: "Some One" +> Date: 1. August 2011 23:28:15 GMT-07:00 +> To: "Other Person" +> Subject: AW: AW: Some subject +> +> Original text + +Text bottom +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + "text_bottom": "Text bottom", + }, + ), + # Sparrow forward + ( + """Hello + +Forwarded message: + +> From: Some One +> To: Other person +> Date: Thursday, March 7, 2013 7:09:41 PM +> Subject: Re: Syncing Up +> +> OHAI +> +> Great news! + +Text bottom +""", + { + "text_top": "Hello", + "type": "forward", + "from": "Some One ", + "date": "Thursday, March 7, 2013 7:09:41 PM", + "subject": "Re: Syncing Up", + "to": "Other person ", + "text": "OHAI\n\nGreat news!", + "text_bottom": "Text bottom", + }, + ), + # Forward with *bold* text + ( + """Hello + +Forwarded message: + +*From:* Some One +*To:* Other Person +*Date:* Wednesday, February 6, 2013 7:46:53 AM +*Subject:* Fwd: Hottest Startups + +This is interesting.""", + { + "text_top": "Hello", + "type": "forward", + "from": "Some One ", + "date": "Wednesday, February 6, 2013 7:46:53 AM", + "subject": "Fwd: Hottest Startups", + "to": "Other Person ", + "text": "This is interesting.", + }, + ), + # No forwarding message text + ( + """Hello + +From: "Some One" +Date: 1. August 2011 23:28:15 GMT-07:00 +To: "Other Person" +Subject: AW: AW: Some subject + +Original text +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + }, + ), + # No forwarding message text (quoted) + ( + """Hello + +> From: "Some One" +> Date: 1. August 2011 23:28:15 GMT-07:00 +> To: "Other Person" +> Subject: AW: AW: Some subject +> +> Original text +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + }, + ), + # Outlook + ( + """-------- Original Message -------- +Subject: \tSome Newsletter +Date: \tFri, 19 Jun 2009 19:16:04 +0200 +From: \tfrom +Reply-To: \treply +To: \tto@example.com + +OHAI""", + { + "type": "forward", + "from": "from ", + "reply-to": "reply ", + "date": "Fri, 19 Jun 2009 19:16:04 +0200", + "subject": "Some Newsletter", + "to": "to@example.com", + "text": "OHAI", + }, + ), + # Some clients (Blackberry?) have weird whitespace rules + ( + """hello world + +-----Original Message----- +From: "Some One" + +Date: Sat, 22 Mar 2008 12:16:06 +To: + + +Subject: Antw: FW: html + + +OHAI... +""", + { + "text_top": "hello world", + "type": "forward", + "from": '"Some One" ', + "date": "Sat, 22 Mar 2008 12:16:06", + "subject": "Antw: FW: html", + "to": "", + "text": "OHAI...", + }, + ), + # Just a quote + ( + """hello world + +Hey: This is very important + +> Lorem ipsum +> dolor sit amet +> adipiscing elit. + +-- +kthxbye +""", + { + "type": "quote", + "text_top": "hello world\n\nHey: This is very important", + "text": "Lorem ipsum\ndolor sit amet\nadipiscing elit.", + "text_bottom": "--\nkthxbye", + }, + ), + # No message + ( + """hello world + +Hey: This is very important + +> No quoted message (just one line). +""", + None, + ), + # No quote / headers in forwarded message + ( + """Begin forwarded message: +Hello +""", + { + "type": "forward", + "text": "Hello", + }, + ), + # Confusing email signature + ( + """Phone: 12345 +Fax: 67890 +Skype: foobar + +---------- Forwarded message ---------- +From: Someone +Subject: The email + +Email text. +""", + { + "text_top": "Phone: 12345\nFax: 67890\nSkype: foobar", + "type": "forward", + "from": "Someone ", + "subject": "The email", + "text": "Email text.", + }, + ), + # Long subject + ( + """---------- Forwarded message ---------- +From: Someone +Subject: The email has a very long and confusing subject with spans over +multiple lines. +To: Destination + +Email text. +""", + { + "type": "forward", + "from": "Someone ", + "to": "Destination ", + "subject": "The email has a very long and confusing subject with spans over multiple lines.", + "text": "Email text.", + }, + ), + # Reply + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone wrote: + +> Some quoted text +""", + { + "type": "reply", + "date": "2012-10-16 at 17:02", + "from": "Someone ", + "text_top": "Hello world.", + "text": "Some quoted text", + }, + ), + # Reply with line break + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone < +someone@example.com> wrote: + +> Some quoted text +""", + { + "type": "reply", + "date": "2012-10-16 at 17:02", + "from": "Someone ", + "text_top": "Hello world.", + "text": "Some quoted text", + }, + ), + # French email + ( + """ +De : Someone +Répondre à : Reply +Date : Wednesday, 17 September 2014 4:24 pm +À : "Someone Else" +Objet : Re: test subject + +Hello, thanks for your reply + """, + { + "type": "forward", + "date": "Wednesday, 17 September 2014 4:24 pm", + "from": "Someone ", + "reply-to": "Reply ", + "to": '"Someone Else" ', + "subject": "Re: test subject", + "text": "Hello, thanks for your reply", + }, + ), + # Forwarded French Apple Mail + ( + """ +Text before + +Début du message réexpédié : + +De: "Foo Bar" +Date: 14 novembre 2015 15:14:53 UTC+1 +À: "'Ham Spam'" +Objet: RE: The subject + +Text after +""", + { + "date": "14 novembre 2015 15:14:53 UTC+1", + "from": '"Foo Bar" ', + "subject": "RE: The subject", + "text": "Text after", + "text_top": "Text before", + "to": "\"'Ham Spam'\" ", + "type": "forward", + }, + ), + # Forwarded French Thunderbird + ( + """ +Text before + +-------- Message transféré -------- +Sujet : Re: Some subject +Date : Wed, 11 Nov 2015 12:31:25 +0100 +De : Foo Bar +Pour : Ham Spam + +Text after +""", + { + "date": "Wed, 11 Nov 2015 12:31:25 +0100", + "from": "Foo Bar ", + "subject": "Re: Some subject", + "text": "Text after", + "text_top": "Text before", + "to": "Ham Spam ", + "type": "forward", + }, + ), + # Forwarded Gmail Swedish + ( + """Hello + +---------- Vidarebefordrat meddelande ---------- +Från: Someone +Datum: 26 april 2013 20:13 +Ämne: Weekend Spanish classes +Till: recipient@example.com + +Spanish Classes +Learn Spanish +""", + { + "text_top": "Hello", + "type": "forward", + "from": "Someone ", + "date": "26 april 2013 20:13", + "subject": "Weekend Spanish classes", + "to": "recipient@example.com", + "text": "Spanish Classes\nLearn Spanish", + }, + ), + ], +) +def test_unwrap(text, expected): + assert unwrap(text) == expected