On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:
Lorem ipsum dolor sit amet.
On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:Lorem ipsum dolor sit amet.
Hey Phil,\xa0\r\nSending you the report:\xa0--\r\n
Hey Phil,\xa0\r\nSending you the report:\xa0--\r\n
Hey Phil,\xa0\r\nSending you the report:\xa0--\r\n
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
Here is spam.
\r\nHam
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
\r\nHey\r\nHam,\r\n
I like spam.
Here is spam.
\r\nHam
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
Hey\r\nHam,\r\n
I like spam.
Here is spam.
\r\nHam
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
\r\nHey\r\nHam,\r\n
I like spam.
some stuff""", + [ + ( + True, + "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:", + ), + (False, "
some stuff"), + ], + [ + ( + True, + "", + ), + ( + False, + "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:
some stuff", + ), + ], + ), + # Images + ( + """
Hi there ~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
Hi there\xa0
Hi there\xa0
One
Two
Three
""") == [ + (True, "One
Two
Three
"), + ] + + +def test_limit(): + assert quote_html( + """One
Two
Three
Four
""", limit=3 + ) == [ + (True, "One
Two
Three
"), + (False, "Four
"), + ] + + +def test_empty(): + assert quote_html("") == [ + (True, ""), + ] + + +def test_comment(): + assert quote_html("""""") == [ + (True, ""), + ] + + +def test_comment_2(): + assert quote_html("""AB""") == [ + (True, "AB"), + ] + + +def test_comment_3(): + assert quote_html( + """On Nov 12, 2014, at 11:07 PM, Some One <someone@example.com> wrote:
Lorem ipsum dolor sit amet.
Hey Phil,\xa0\r\nSending you the report:\xa0--\r\n
Hey Phil,\xa0\r\nSending you the report:\xa0--\r\n
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
Thanks,
From: John Doe [mailto:john@example.com]
Sent: Tuesday, December 30, 2014 5:31 PM
To: recipient@example.com
Subject: Excited to have you on board!
Hey,
some stuff""" - ), - [ - ( - True, - "On Thu, Dec 18, 2014 at 10:02 AM, foo <foo@example.com> wrote:", - ), - (False, "
some stuff"), - ], - ) - - def test_images(self): - self.assertEqual( - quote_html( - """
Hi there ~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\">
Hi there\xa0
One
Two
Three
"""), - [ - (True, "One
Two
Three
"), - ], - ) - - def test_limit(self): - self.assertEqual( - quote_html( - """One
Two
Three
Four
""", limit=3 - ), - [ - (True, "One
Two
Three
"), - (False, "Four
"), - ], - ) - - def test_empty(self): - self.assertEqual( - quote_html(""), - [ - (True, ""), - ], - ) - - def test_comment(self): - self.assertEqual( - quote_html(""""""), - [ - (True, ""), - ], - ) - - def test_comment_2(self): - self.assertEqual( - quote_html("""AB"""), - [ - (True, "AB"), - ], - ) - - def test_comment_3(self): - self.assertEqual( - quote_html( - """Here is spam.
\r\nHam
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
\r\nHey\r\nHam,\r\n
I like spam.
Here is spam.
\r\nHam
Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <spam@example.com>:
Hey\r\nHam,\r\n
I like spam.
Begin forwarded message:From: Foo Bar <foo@bar.example>Subject: The SubjectDate: March 24, 2016 at 20:16:25 GMT+1To: John Doe <john@doe.example>Text of the original email' - - self.assertEqual( - unwrap_html(html), - { - "type": "forward", - "subject": "The Subject", - "date": "March 24, 2016 at 20:16:25 GMT+1", - "from": "Foo Bar", - "to": "John Doe ", - "html_top": 'test blah', - "html": '', - }, - ) - - def test_gmail_forward(self): - html = 'Text of the original emailtest' - - self.assertEqual( - unwrap_html(html), - { - "type": "forward", - "subject": "The Subject", - "date": "Thu, Mar 24, 2016 at 5:17 PM", - "from": "Foo Barblah---------- Forwarded message ----------
From: Foo Bar <foo@bar.example>
Date: Thu, Mar 24, 2016 at 5:17 PM
Subject: The Subject
To: John Doe <john@doe.example>Some text", - "to": "John Doe ", - "html_top": ' test', - "html": 'blah', - }, - ) - - def test_apple_reply(self): - html = 'FooSome textBar' - - self.assertEqual( - unwrap_html(html), - { - "type": "reply", - "from": "John DoeOn 2016-03-25, at 23:01, John Doe <john@doe.example> wrote:Some important email", - "date": "2016-03-25, at 23:01", - "html": ' ', - "html_top": 'FooSome important emailBar', - }, - ) - - def test_gmail_reply(self): - html = """foobar-""" - - self.assertEqual( - unwrap_html(html), - { - "type": "reply", - "from": "Foo BarOn Wed, Mar 16, 2016 at 12:49 AM, Foo Bar <foo@bar.example> wrote:Hi,
-
This is the reply
-
-Thanks a lot!
-Foo
-
----John DoeSenior DirectorSome Company", - "date": "Wed, Mar 16, 2016 at 12:49 AM", - "html_top": ' foo', - "html": 'bar', - "html_bottom": 'Hi,
\n
This is the reply
\n
\nThanks a lot!
\nFoo--\n', - }, - ) - - def test_outlook_forward(self): - data = self.read_file("outlook_forward.html") - result = unwrap_html(data) - self.assertEqual(result["type"], "forward") - self.assertEqual(result["from"], "John Doe") - self.assertEqual(result["to"], "Foo Bar (foo@bar.example)") - self.assertEqual(result["date"], "Wednesday, July 09, 2014 10:27 AM") - self.assertEqual(result["subject"], "The subject!") - self.assert_equal_to_file( - result["html"], "outlook_forward_unwrapped.html" - ) - self.assert_equal_to_file( - result["html_top"], "outlook_forward_unwrapped_top.html" - ) - self.assertNotIn("html_bottom", result) - - def test_thunderbird_forward(self): - data = self.read_file("thunderbird_forward.html") - result = unwrap_html(data) - self.assertEqual(result["type"], "forward") - self.assertEqual(result["from"], "John Doe\nJohn DoeSenior DirectorSome Company") - self.assertEqual(result["to"], "Foo Bar ") - self.assertEqual( - result["date"], "Tue, 3 May 2016 14:54:27 +0200 (CEST)" - ) - self.assertEqual(result["subject"], "Re: Example subject") - self.assertNotIn("html_top", result) - self.assert_equal_to_file( - result["html"], "thunderbird_forward_unwrapped.html" - ) - self.assertNotIn("html_bottom", result) - - def test_mailru_forward(self): - data = self.read_file("mailru_forward.html") - result = unwrap_html(data) - self.assertEqual(result["type"], "forward") - self.assertEqual( - result["from"], "Иван Иванов " - ) - self.assertEqual(result["to"], "Петр Петров ") - self.assertEqual(result["date"], "Среда, 14 июня 2017, 15:19 +03:00") - self.assertEqual(result["subject"], "Тестовая тема") - self.assertNotIn("html_top", result) - self.assert_equal_to_file( - result["html"], "mailru_forward_unwrapped.html" - ) - self.assertNotIn("html_bottom", result) - - -class InternalTestCase(unittest.TestCase): - def test_parse_reply(self): - from quotequail._internal import parse_reply - - data = parse_reply( - "Am 24.02.2015 um 22:48 schrieb John Doe :" - ) - self.assertEqual( - data, - { - "date": "24.02.2015 um 22:48", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "On Monday, March 7, 2016 10:19 AM, John Doe wrote:" - ) - self.assertEqual( - data, - { - "date": "Monday, March 7, 2016 10:19 AM", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "On Feb 22, 2015, at 9:19 PM, John Doe wrote:" - ) - self.assertEqual( - data, - { - "date": "Feb 22, 2015, at 9:19 PM", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "On 2016-03-14, at 20:26, John Doe wrote:" - ) - self.assertEqual( - data, - { - "date": "2016-03-14, at 20:26", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "Le 6 janv. 2014 à 19:50, John Doe a écrit :" - ) - self.assertEqual( - data, - { - "date": "6 janv. 2014 \xe0 19:50", - "from": "John Doe ", - }, - ) - - data = parse_reply( - "Le 02.10.2013 à 11:13, John Doe a écrit :" - ) - self.assertEqual( - data, - { - "date": "02.10.2013 \xe0 11:13", - "from": "John Doe ", - }, - ) - - data = parse_reply("El 11/07/2012 06:13 p.m., John Doe escribió:") - self.assertEqual( - data, {"date": "11/07/2012 06:13 p.m.", "from": "John Doe"} - ) - - data = parse_reply("El 06/04/2010, a las 13:13, John Doe escribió:") - self.assertEqual( - data, {"date": "06/04/2010, a las 13:13", "from": "John Doe"} - ) - - data = parse_reply("2009/5/12 John Doe ") - self.assertEqual( - data, {"date": "2009/5/12", "from": "John Doe "} - ) - - data = parse_reply("On 8 o'clock, John Doe wrote:") - self.assertEqual(data, {"date": "8 o'clock", "from": "John Doe"}) - - # Swedish - data = parse_reply( - "Den 24 februari 2015 22:48 skrev John Doe :" - ) - self.assertEqual( - data, - { - "date": "24 februari 2015 22:48", - "from": "John Doe ", - }, - ) - - # Brazillian portuguese - data = parse_reply( - "Em qui, 24 de jan de 2019 às 14:31, John Doe escreveu:" - ) - self.assertEqual( - data, - { - "date": "qui, 24 de jan de 2019 às 14:31", - "from": "John Doe ", - }, - ) - - -class InternalHTMLTestCase(unittest.TestCase): - def test_extract_headers(self): - from quotequail._internal import extract_headers - - self.assertEqual(extract_headers([], 2), ({}, 0)) - self.assertEqual(extract_headers(["test"], 2), ({}, 0)) - self.assertEqual( - extract_headers(["From: b", "To: c"], 2), - ({"from": "b", "to": "c"}, 2), - ) - self.assertEqual( - extract_headers(["From: b", "foo"], 2), ({"from": "b foo"}, 2) - ) - self.assertEqual( - extract_headers(["From: b", "foo"], 1), ({"from": "b"}, 1) - ) - self.assertEqual( - extract_headers(["From: b", "To: c", "", "other line"], 2), - ({"from": "b", "to": "c"}, 2), - ) - self.assertEqual( - extract_headers( - [ - "From: some very very very long name <", - "verylong@example.com>", - "Subject: this is a very very very very long", - "subject", - "", - "other line", - ], - 2, - ), - ( - { - "from": "some very very very long name ", - "subject": "this is a very very very very long subject", - }, - 4, - ), - ) - self.assertEqual( - extract_headers( - [ - "From: some very very very long name <", - "verylong@example.com>", - ], - 1, - ), - ( - { - "from": "some very very very long name <", - }, - 1, - ), - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_unwrap.py b/tests/test_unwrap.py new file mode 100644 index 0000000..a785f44 --- /dev/null +++ b/tests/test_unwrap.py @@ -0,0 +1,427 @@ +import pytest + +from quotequail import unwrap + + +@pytest.mark.parametrize( + ("text", "expected"), + [ + # Gmail forward + ( + """Hello + +---------- Forwarded message ---------- +From: Someone +Date: Fri, Apr 26, 2013 at 8:13 PM +Subject: Weekend Spanish classes +To: recipient@example.com + +Spanish Classes +Learn Spanish +""", + { + "text_top": "Hello", + "type": "forward", + "from": "Someone ", + "date": "Fri, Apr 26, 2013 at 8:13 PM", + "subject": "Weekend Spanish classes", + "to": "recipient@example.com", + "text": "Spanish Classes\nLearn Spanish", + }, + ), + # Apple Mail (10.9 and earlier) forward + ( + """Hello + +Begin forwarded message: + +> From: "Some One" +> Date: 1. August 2011 23:28:15 GMT-07:00 +> To: "Other Person" +> Subject: AW: AW: Some subject +> +> Original text + +Text bottom +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + "text_bottom": "Text bottom", + }, + ), + ( + # Apple Mail (10.10) forward + """Hello + +> Begin forwarded message: +> +> From: "Some One" +> Date: 1. August 2011 23:28:15 GMT-07:00 +> To: "Other Person" +> Subject: AW: AW: Some subject +> +> Original text + +Text bottom +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + "text_bottom": "Text bottom", + }, + ), + # Sparrow forward + ( + """Hello + +Forwarded message: + +> From: Some One +> To: Other person +> Date: Thursday, March 7, 2013 7:09:41 PM +> Subject: Re: Syncing Up +> +> OHAI +> +> Great news! + +Text bottom +""", + { + "text_top": "Hello", + "type": "forward", + "from": "Some One ", + "date": "Thursday, March 7, 2013 7:09:41 PM", + "subject": "Re: Syncing Up", + "to": "Other person ", + "text": "OHAI\n\nGreat news!", + "text_bottom": "Text bottom", + }, + ), + # Forward with *bold* text + ( + """Hello + +Forwarded message: + +*From:* Some One +*To:* Other Person +*Date:* Wednesday, February 6, 2013 7:46:53 AM +*Subject:* Fwd: Hottest Startups + +This is interesting.""", + { + "text_top": "Hello", + "type": "forward", + "from": "Some One ", + "date": "Wednesday, February 6, 2013 7:46:53 AM", + "subject": "Fwd: Hottest Startups", + "to": "Other Person ", + "text": "This is interesting.", + }, + ), + # No forwarding message text + ( + """Hello + +From: "Some One" +Date: 1. August 2011 23:28:15 GMT-07:00 +To: "Other Person" +Subject: AW: AW: Some subject + +Original text +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + }, + ), + # No forwarding message text (quoted) + ( + """Hello + +> From: "Some One" +> Date: 1. August 2011 23:28:15 GMT-07:00 +> To: "Other Person" +> Subject: AW: AW: Some subject +> +> Original text +""", + { + "text_top": "Hello", + "type": "forward", + "from": '"Some One" ', + "date": "1. August 2011 23:28:15 GMT-07:00", + "subject": "AW: AW: Some subject", + "to": '"Other Person" ', + "text": "Original text", + }, + ), + # Outlook + ( + """-------- Original Message -------- +Subject: \tSome Newsletter +Date: \tFri, 19 Jun 2009 19:16:04 +0200 +From: \tfrom +Reply-To: \treply +To: \tto@example.com + +OHAI""", + { + "type": "forward", + "from": "from ", + "reply-to": "reply ", + "date": "Fri, 19 Jun 2009 19:16:04 +0200", + "subject": "Some Newsletter", + "to": "to@example.com", + "text": "OHAI", + }, + ), + # Some clients (Blackberry?) have weird whitespace rules + ( + """hello world + +-----Original Message----- +From: "Some One" + +Date: Sat, 22 Mar 2008 12:16:06 +To: + + +Subject: Antw: FW: html + + +OHAI... +""", + { + "text_top": "hello world", + "type": "forward", + "from": '"Some One" ', + "date": "Sat, 22 Mar 2008 12:16:06", + "subject": "Antw: FW: html", + "to": " ", + "text": "OHAI...", + }, + ), + # Just a quote + ( + """hello world + +Hey: This is very important + +> Lorem ipsum +> dolor sit amet +> adipiscing elit. + +-- +kthxbye +""", + { + "type": "quote", + "text_top": "hello world\n\nHey: This is very important", + "text": "Lorem ipsum\ndolor sit amet\nadipiscing elit.", + "text_bottom": "--\nkthxbye", + }, + ), + # No message + ( + """hello world + +Hey: This is very important + +> No quoted message (just one line). +""", + None, + ), + # No quote / headers in forwarded message + ( + """Begin forwarded message: +Hello +""", + { + "type": "forward", + "text": "Hello", + }, + ), + # Confusing email signature + ( + """Phone: 12345 +Fax: 67890 +Skype: foobar + +---------- Forwarded message ---------- +From: Someone +Subject: The email + +Email text. +""", + { + "text_top": "Phone: 12345\nFax: 67890\nSkype: foobar", + "type": "forward", + "from": "Someone ", + "subject": "The email", + "text": "Email text.", + }, + ), + # Long subject + ( + """---------- Forwarded message ---------- +From: Someone +Subject: The email has a very long and confusing subject with spans over +multiple lines. +To: Destination + +Email text. +""", + { + "type": "forward", + "from": "Someone ", + "to": "Destination ", + "subject": "The email has a very long and confusing subject with spans over multiple lines.", + "text": "Email text.", + }, + ), + # Reply + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone wrote: + +> Some quoted text +""", + { + "type": "reply", + "date": "2012-10-16 at 17:02", + "from": "Someone ", + "text_top": "Hello world.", + "text": "Some quoted text", + }, + ), + # Reply with line break + ( + """Hello world. + +On 2012-10-16 at 17:02 , Someone < +someone@example.com> wrote: + +> Some quoted text +""", + { + "type": "reply", + "date": "2012-10-16 at 17:02", + "from": "Someone ", + "text_top": "Hello world.", + "text": "Some quoted text", + }, + ), + # French email + ( + """ +De : Someone +Répondre à : Reply +Date : Wednesday, 17 September 2014 4:24 pm +À : "Someone Else" +Objet : Re: test subject + +Hello, thanks for your reply + """, + { + "type": "forward", + "date": "Wednesday, 17 September 2014 4:24 pm", + "from": "Someone ", + "reply-to": "Reply ", + "to": '"Someone Else" ', + "subject": "Re: test subject", + "text": "Hello, thanks for your reply", + }, + ), + # Forwarded French Apple Mail + ( + """ +Text before + +Début du message réexpédié : + +De: "Foo Bar" +Date: 14 novembre 2015 15:14:53 UTC+1 +À: "'Ham Spam'" +Objet: RE: The subject + +Text after +""", + { + "date": "14 novembre 2015 15:14:53 UTC+1", + "from": '"Foo Bar" ', + "subject": "RE: The subject", + "text": "Text after", + "text_top": "Text before", + "to": "\"'Ham Spam'\" ", + "type": "forward", + }, + ), + # Forwarded French Thunderbird + ( + """ +Text before + +-------- Message transféré -------- +Sujet : Re: Some subject +Date : Wed, 11 Nov 2015 12:31:25 +0100 +De : Foo Bar +Pour : Ham Spam + +Text after +""", + { + "date": "Wed, 11 Nov 2015 12:31:25 +0100", + "from": "Foo Bar ", + "subject": "Re: Some subject", + "text": "Text after", + "text_top": "Text before", + "to": "Ham Spam ", + "type": "forward", + }, + ), + # Forwarded Gmail Swedish + ( + """Hello + +---------- Vidarebefordrat meddelande ---------- +Från: Someone +Datum: 26 april 2013 20:13 +Ämne: Weekend Spanish classes +Till: recipient@example.com + +Spanish Classes +Learn Spanish +""", + { + "text_top": "Hello", + "type": "forward", + "from": "Someone ", + "date": "26 april 2013 20:13", + "subject": "Weekend Spanish classes", + "to": "recipient@example.com", + "text": "Spanish Classes\nLearn Spanish", + }, + ), + ], +) +def test_unwrap(text, expected): + assert unwrap(text) == expected