diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 536383e..97c0517 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/setup-python@v2 name: Install Python with: - python-version: 3.8 + python-version: 3.9 - run: | pip install packaging diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2b95118..b3e2057 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -11,7 +11,7 @@ jobs: lint: strategy: matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11' ] + python-version: ['3.9', '3.10', '3.11'] name: Lint ${{ matrix.python-version }} runs-on: 'ubuntu-20.04' container: python:${{ matrix.python-version }} @@ -21,14 +21,16 @@ jobs: - name: Lint code run: | - pip install lintlizard==0.18.0 "click<8.1" - lintlizard + pip install ruff==0.5.0 + ruff check + ruff format --check + ruff check --select I # Run tests test: strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11'] # Do not cancel any jobs when a single job fails fail-fast: false name: Python ${{ matrix.python-version }} diff --git a/pyproject.toml b/pyproject.toml index 96235fb..b60da53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,56 @@ -[tool.black] -target-version = ['py37'] -exclude = ''' -/( - \.git - | \.venv - | venv - | src -)/ -''' \ No newline at end of file +[tool.ruff] +target-version = "py39" +line-length = 79 + +[tool.ruff.lint] +ignore = [ + "ISC001", + "PLR2004", + "TRY003", + # Some patterns contain special characters. + "PLR0911", + "RUF001", +] +select = [ + "A001", + "B", + "C", + "E", + "EXE", + "F", + "G", + "I", + "INP", + "ISC", + "N", + "PGH", + "PIE", + "PL", + "PT", + "RET", + "RUF", + "S", + "SIM", + "T", + "TCH", + "TID25", + "TRY", + "UP", + "W", + # Consider enabling later. + # "ANN", + # "PTH", +] + +[tool.ruff.lint.isort] +combine-as-imports = true +forced-separate = ["tests"] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.pylint] +max-branches = 16 + +[tool.ruff.lint.per-file-ignores] +"tests/test_quotequail.py" = ["E501", "PT009"] diff --git a/quotequail/__init__.py b/quotequail/__init__.py index 4ab2e30..04300b0 100644 --- a/quotequail/__init__.py +++ b/quotequail/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # quotequail # a library that identifies quoted text in email messages @@ -16,12 +15,14 @@ def quote(text, limit=1000): Example: [(True, 'expanded text'), (False, '> Some quoted text')] - Unless the limit param is set to None, the text will automatically be quoted - starting at the line where the limit is reached. + Unless the limit param is set to None, the text will automatically be + quoted starting at the line where the limit is reached. """ lines = text.split("\n") - found = _internal.find_quote_position(lines, _patterns.MAX_WRAP_LINES, limit) + found = _internal.find_quote_position( + lines, _patterns.MAX_WRAP_LINES, limit + ) if found is not None: return [ @@ -49,11 +50,11 @@ def quote_html(html, limit=1000): if found is None: # No quoting found and we're below limit. We're done. return [(True, _html.render_html_tree(tree))] - else: - start_tree = _html.slice_tree( - tree, start_refs, end_refs, (0, found + 1), html_copy=html - ) - end_tree = _html.slice_tree(tree, start_refs, end_refs, (found + 1, None)) + + start_tree = _html.slice_tree( + tree, start_refs, end_refs, (0, found + 1), html_copy=html + ) + end_tree = _html.slice_tree(tree, start_refs, end_refs, (found + 1, None)) return [ (True, _html.render_html_tree(start_tree)), @@ -83,35 +84,37 @@ def unwrap(text): _patterns.MIN_HEADER_LINES, _patterns.MIN_QUOTED_LINES, ) - if result: - typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result + if not result: + return None - text_top = lines[slice(*top_range)] if top_range else "" - text = lines[slice(*main_range)] if main_range else "" - text_bottom = lines[slice(*bottom_range)] if bottom_range else "" + typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result - if needs_unindent: - text = _internal.unindent_lines(text) + text_top = lines[slice(*top_range)] if top_range else "" + text = lines[slice(*main_range)] if main_range else "" + text_bottom = lines[slice(*bottom_range)] if bottom_range else "" - result = { - "type": typ, - } + if needs_unindent: + text = _internal.unindent_lines(text) - text = "\n".join(text).strip() - text_top = "\n".join(text_top).strip() - text_bottom = "\n".join(text_bottom).strip() + result = { + "type": typ, + } - if text: - result["text"] = text - if text_top: - result["text_top"] = text_top - if text_bottom: - result["text_bottom"] = text_bottom + text = "\n".join(text).strip() + text_top = "\n".join(text_top).strip() + text_bottom = "\n".join(text_bottom).strip() - if hdrs: - result.update(hdrs) + if text: + result["text"] = text + if text_top: + result["text_top"] = text_top + if text_bottom: + result["text_bottom"] = text_bottom - return result + if hdrs: + result.update(hdrs) + + return result def unwrap_html(html): @@ -164,7 +167,9 @@ def unwrap_html(html): result["html_bottom"] = html_bottom if main_range: - main_tree = _html.slice_tree(tree, start_refs, end_refs, main_range) + main_tree = _html.slice_tree( + tree, start_refs, end_refs, main_range + ) if needs_unindent: _html.unindent_tree(main_tree) html = _html.render_html_tree(main_tree) diff --git a/quotequail/_html.py b/quotequail/_html.py index 1bc19d7..f393852 100644 --- a/quotequail/_html.py +++ b/quotequail/_html.py @@ -220,7 +220,7 @@ def get_html_tree(html): for el in tree.iter(): if el.nsmap or (isinstance(el.tag, str) and ":" in el.tag): if el.nsmap: - actual_tag_name = "{}:{}".format(list(el.nsmap.keys())[0], el.tag) + actual_tag_name = f"{next(iter(el.nsmap.keys()))}:{el.tag}" else: actual_tag_name = el.tag el.tag = "span" @@ -355,7 +355,9 @@ def _trim_spaces(text): line_break = tag_name == "br" and state == BEGIN is_block = tag_name not in INLINE_TAGS is_forward = ( - is_block and state == BEGIN and el.attrib.get("style") in FORWARD_STYLES + is_block + and state == BEGIN + and el.attrib.get("style") in FORWARD_STYLES ) if is_block or line_break: @@ -371,7 +373,12 @@ def _trim_spaces(text): if is_forward: # Simulate forward - yield (end_ref, end_ref, start_indentation_level, FORWARD_LINE) + yield ( + end_ref, + end_ref, + start_indentation_level, + FORWARD_LINE, + ) counter += 1 if max_lines is not None and counter > max_lines: return @@ -384,7 +391,7 @@ def _trim_spaces(text): line += token else: - raise RuntimeError("invalid token: {}".format(token)) + raise RuntimeError(f"invalid token: {token}") line = _trim_spaces(line) if line: @@ -402,9 +409,8 @@ def indented_tree_line_generator(el, max_lines=None): gen = tree_line_generator(el, max_lines) for start_ref, end_ref, indentation_level, line in gen: # Escape line - if line.startswith(">"): - line = "\\" + line - yield start_ref, end_ref, "> " * indentation_level + line + full_line = "\\" + line if line.startswith(">") else line + yield start_ref, end_ref, "> " * indentation_level + full_line def get_line_info(tree, max_lines=None): @@ -417,5 +423,4 @@ def get_line_info(tree, max_lines=None): line_gen_result = list(zip(*line_gen)) if line_gen_result: return line_gen_result - else: - return [], [], [] + return [], [], [] diff --git a/quotequail/_internal.py b/quotequail/_internal.py index 389713d..c76aa60 100644 --- a/quotequail/_internal.py +++ b/quotequail/_internal.py @@ -150,8 +150,12 @@ def parse_reply(line): "from": user.strip(), } + return None + -def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines): +def find_unwrap_start( + lines, max_wrap_lines, min_header_lines, min_quoted_lines +): """ Find the starting point of a wrapped email. Returns a tuple containing (start_line_number, end_line_number, type), where type can be one of the @@ -193,8 +197,7 @@ def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines) continue if not peek_line.startswith(">"): break - else: - matched_lines += 1 + matched_lines += 1 if matched_lines >= min_quoted_lines: return n, n, "quoted" @@ -202,7 +205,8 @@ def find_unwrap_start(lines, max_wrap_lines, min_header_lines, min_quoted_lines) match = HEADER_RE.match(line) if ( match - and len(extract_headers(lines[n:], max_wrap_lines)[0]) >= min_header_lines + and len(extract_headers(lines[n:], max_wrap_lines)[0]) + >= min_header_lines ): return n, n, "headers" @@ -244,7 +248,9 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): main_type = typ if typ == "reply": - reply_headers = parse_reply(join_wrapped_lines(lines[start : end + 1])) + reply_headers = parse_reply( + join_wrapped_lines(lines[start : end + 1]) + ) if reply_headers: headers.update(reply_headers) @@ -263,7 +269,9 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): unquoted, max_wrap_lines, min_header_lines, min_quoted_lines ) if typ == "headers": - hdrs, hdrs_length = extract_headers(unquoted[start3:], max_wrap_lines) + hdrs, hdrs_length = extract_headers( + unquoted[start3:], max_wrap_lines + ) if hdrs: headers.update(hdrs) rest2_start = quoted_start + start3 + hdrs_length @@ -275,43 +283,51 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): (rest_start, None), True, ) - else: - return ( - main_type, - (0, start), - headers, - (quoted_start, rest_start), - (rest_start, None), - True, - ) + return ( + main_type, + (0, start), + headers, + (quoted_start, rest_start), + (rest_start, None), + True, + ) - elif typ == "headers": - hdrs, hdrs_length = extract_headers(lines[start + 1 :], max_wrap_lines) + if typ == "headers": + hdrs, hdrs_length = extract_headers( + lines[start + 1 :], max_wrap_lines + ) if hdrs: headers.update(hdrs) rest_start = start + 1 + hdrs_length - return main_type, (0, start), headers, (rest_start, None), None, False - else: - # Didn't find quoted section or headers, assume that everything - # below is the qouted text. return ( main_type, (0, start), headers, - (start + (start2 or 0) + 1, None), + (rest_start, None), None, False, ) + # Didn't find quoted section or headers, assume that everything + # below is the qouted text. + return ( + main_type, + (0, start), + headers, + (start + (start2 or 0) + 1, None), + None, + False, + ) + # We just found headers, which usually indicates a forwarding. - elif typ == "headers": + if typ == "headers": main_type = "forward" hdrs, hdrs_length = extract_headers(lines[start:], max_wrap_lines) rest_start = start + hdrs_length return main_type, (0, start), hdrs, (rest_start, None), None, False # We found quoted text. Headers may be within the quoted text. - elif typ == "quoted": + if typ == "quoted": unquoted = unindent_lines(lines[start:]) rest_start = start + len(unquoted) start2, end2, typ = find_unwrap_start( @@ -319,7 +335,9 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): ) if typ == "headers": main_type = "forward" - hdrs, hdrs_length = extract_headers(unquoted[start2:], max_wrap_lines) + hdrs, hdrs_length = extract_headers( + unquoted[start2:], max_wrap_lines + ) rest2_start = start + hdrs_length return ( main_type, @@ -329,13 +347,15 @@ def unwrap(lines, max_wrap_lines, min_header_lines, min_quoted_lines): (rest_start, None), True, ) - else: - main_type = "quote" - return ( - main_type, - (None, start), - None, - (start, rest_start), - (rest_start, None), - True, - ) + + main_type = "quote" + return ( + main_type, + (None, start), + None, + (start, rest_start), + (rest_start, None), + True, + ) + + return None diff --git a/quotequail/_patterns.py b/quotequail/_patterns.py index e5ddb03..788adf8 100644 --- a/quotequail/_patterns.py +++ b/quotequail/_patterns.py @@ -1,7 +1,4 @@ -# -*- coding: utf-8 -*- - import re -from typing import List REPLY_PATTERNS = [ "^On (.*) wrote:$", # apple mail/gmail reply @@ -14,7 +11,9 @@ "([0-9]{4}/[0-9]{1,2}/[0-9]{1,2}) (.* <.*@.*>)$", # gmail (?) reply ] -REPLY_DATE_SPLIT_REGEX = re.compile(r"^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$") +REPLY_DATE_SPLIT_REGEX = re.compile( + r"^(.*(:[0-9]{2}( [apAP]\.?[mM]\.?)?)), (.*)?$" +) FORWARD_MESSAGES = [ # apple mail forward @@ -42,7 +41,7 @@ FORWARD_PATTERNS = ( [ - "^{}$".format(FORWARD_LINE), + f"^{FORWARD_LINE}$", ] + [f"^---+ ?{p} ?---+$" for p in FORWARD_MESSAGES] + [f"^{p}:$" for p in FORWARD_MESSAGES] @@ -101,7 +100,11 @@ "forward": [re.compile(regex) for regex in FORWARD_PATTERNS], } -COMPILED_PATTERNS: List[re.Pattern] = sum(COMPILED_PATTERN_MAP.values(), []) +COMPILED_PATTERNS: list[re.Pattern] = [ + pattern + for patterns in COMPILED_PATTERN_MAP.values() + for pattern in patterns +] MULTIPLE_WHITESPACE_RE = re.compile(r"\s+") diff --git a/setup.py b/setup.py index 8ff40b7..d9f2fa2 100644 --- a/setup.py +++ b/setup.py @@ -12,10 +12,13 @@ url="http://github.com/closeio/quotequail", license="MIT", author="Thomas Steinacher", - author_email="engineering@close.io", + author_email="engineering@close.com", maintainer="Thomas Steinacher", - maintainer_email="engineering@close.io", - description="A library that identifies quoted text in plain text and HTML email messages.", + maintainer_email="engineering@close.com", + description=( + "A library that identifies quoted text in plain text and HTML email " + "messages." + ), long_description=__doc__, packages=[ "quotequail", diff --git a/tests/test_quotequail.py b/tests/test_quotequail.py index 29ada32..6ec6e8d 100644 --- a/tests/test_quotequail.py +++ b/tests/test_quotequail.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - import os import unittest @@ -8,7 +6,9 @@ class FileMixin: def read_file(self, name): - with open(os.path.join(os.path.dirname(__file__), "files", name), "rb") as f: + with open( + os.path.join(os.path.dirname(__file__), "files", name), "rb" + ) as f: return f.read().decode("utf8") def assert_equal_to_file(self, string, name): @@ -111,7 +111,10 @@ def test_quote_forward_2(self): """ ), [ - (True, "Hello world.\n\n---------- Forwarded message ----------"), + ( + True, + "Hello world.\n\n---------- Forwarded message ----------", + ), ( False, "From: Someone \nSubject: The email\n\nSome quoted text.\n", @@ -265,7 +268,9 @@ def test_no_quote(self): def test_limit(self): self.assertEqual( - quote_html("""

One

Two

Three

Four

""", limit=3), + quote_html( + """

One

Two

Three

Four

""", limit=3 + ), [ (True, "

One

Two

Three

"), (False, "

Four

"), @@ -591,7 +596,6 @@ def test_outlook_forward(self): "date": "Fri, 19 Jun 2009 19:16:04 +0200", "subject": "Some Newsletter", "to": "to@example.com", - "reply-to": "reply ", "text": "OHAI", }, ) @@ -958,7 +962,9 @@ def test_outlook_forward(self): self.assertEqual(result["to"], "Foo Bar (foo@bar.example)") self.assertEqual(result["date"], "Wednesday, July 09, 2014 10:27 AM") self.assertEqual(result["subject"], "The subject!") - self.assert_equal_to_file(result["html"], "outlook_forward_unwrapped.html") + self.assert_equal_to_file( + result["html"], "outlook_forward_unwrapped.html" + ) self.assert_equal_to_file( result["html_top"], "outlook_forward_unwrapped_top.html" ) @@ -970,28 +976,35 @@ def test_thunderbird_forward(self): self.assertEqual(result["type"], "forward") self.assertEqual(result["from"], "John Doe ") self.assertEqual(result["to"], "Foo Bar ") - self.assertEqual(result["date"], "Tue, 3 May 2016 14:54:27 +0200 (CEST)") + self.assertEqual( + result["date"], "Tue, 3 May 2016 14:54:27 +0200 (CEST)" + ) self.assertEqual(result["subject"], "Re: Example subject") self.assertNotIn("html_top", result) - self.assert_equal_to_file(result["html"], "thunderbird_forward_unwrapped.html") + self.assert_equal_to_file( + result["html"], "thunderbird_forward_unwrapped.html" + ) self.assertNotIn("html_bottom", result) def test_mailru_forward(self): data = self.read_file("mailru_forward.html") result = unwrap_html(data) self.assertEqual(result["type"], "forward") - self.assertEqual(result["from"], "Иван Иванов ") + self.assertEqual( + result["from"], "Иван Иванов " + ) self.assertEqual(result["to"], "Петр Петров ") self.assertEqual(result["date"], "Среда, 14 июня 2017, 15:19 +03:00") self.assertEqual(result["subject"], "Тестовая тема") self.assertNotIn("html_top", result) - self.assert_equal_to_file(result["html"], "mailru_forward_unwrapped.html") + self.assert_equal_to_file( + result["html"], "mailru_forward_unwrapped.html" + ) self.assertNotIn("html_bottom", result) class InternalTestCase(unittest.TestCase): def test_parse_reply(self): - from quotequail._internal import parse_reply data = parse_reply( @@ -999,7 +1012,10 @@ def test_parse_reply(self): ) self.assertEqual( data, - {"date": "24.02.2015 um 22:48", "from": "John Doe "}, + { + "date": "24.02.2015 um 22:48", + "from": "John Doe ", + }, ) data = parse_reply( @@ -1029,7 +1045,10 @@ def test_parse_reply(self): ) self.assertEqual( data, - {"date": "2016-03-14, at 20:26", "from": "John Doe "}, + { + "date": "2016-03-14, at 20:26", + "from": "John Doe ", + }, ) data = parse_reply( @@ -1048,14 +1067,21 @@ def test_parse_reply(self): ) self.assertEqual( data, - {"date": "02.10.2013 \xe0 11:13", "from": "John Doe "}, + { + "date": "02.10.2013 \xe0 11:13", + "from": "John Doe ", + }, ) data = parse_reply("El 11/07/2012 06:13 p.m., John Doe escribió:") - self.assertEqual(data, {"date": "11/07/2012 06:13 p.m.", "from": "John Doe"}) + self.assertEqual( + data, {"date": "11/07/2012 06:13 p.m.", "from": "John Doe"} + ) data = parse_reply("El 06/04/2010, a las 13:13, John Doe escribió:") - self.assertEqual(data, {"date": "06/04/2010, a las 13:13", "from": "John Doe"}) + self.assertEqual( + data, {"date": "06/04/2010, a las 13:13", "from": "John Doe"} + ) data = parse_reply("2009/5/12 John Doe ") self.assertEqual( @@ -1071,7 +1097,10 @@ def test_parse_reply(self): ) self.assertEqual( data, - {"date": "24 februari 2015 22:48", "from": "John Doe "}, + { + "date": "24 februari 2015 22:48", + "from": "John Doe ", + }, ) # Brazillian portuguese @@ -1094,10 +1123,15 @@ def test_extract_headers(self): self.assertEqual(extract_headers([], 2), ({}, 0)) self.assertEqual(extract_headers(["test"], 2), ({}, 0)) self.assertEqual( - extract_headers(["From: b", "To: c"], 2), ({"from": "b", "to": "c"}, 2) + extract_headers(["From: b", "To: c"], 2), + ({"from": "b", "to": "c"}, 2), + ) + self.assertEqual( + extract_headers(["From: b", "foo"], 2), ({"from": "b foo"}, 2) + ) + self.assertEqual( + extract_headers(["From: b", "foo"], 1), ({"from": "b"}, 1) ) - self.assertEqual(extract_headers(["From: b", "foo"], 2), ({"from": "b foo"}, 2)) - self.assertEqual(extract_headers(["From: b", "foo"], 1), ({"from": "b"}, 1)) self.assertEqual( extract_headers(["From: b", "To: c", "", "other line"], 2), ({"from": "b", "to": "c"}, 2), @@ -1124,7 +1158,11 @@ def test_extract_headers(self): ) self.assertEqual( extract_headers( - ["From: some very very very long name <", "verylong@example.com>"], 1 + [ + "From: some very very very long name <", + "verylong@example.com>", + ], + 1, ), ( { @@ -1170,7 +1208,9 @@ def test_tree_line_generator(self): ], ) - tree = _html.get_html_tree("
hi
world
") + tree = _html.get_html_tree( + "
hi
world
" + ) data = list(_html.tree_line_generator(tree)) div = tree.xpath("div")[0] blockquote = tree.xpath("div/blockquote")[0] @@ -1206,12 +1246,15 @@ def test_trim_after(self): tree = _html.get_html_tree(html) _html.trim_tree_after(tree.find("div/span")) - self.assertEqual(_html.render_html_tree(tree), "
AB
") + self.assertEqual( + _html.render_html_tree(tree), "
AB
" + ) tree = _html.get_html_tree(html) _html.trim_tree_after(tree.find("div/span[2]")) self.assertEqual( - _html.render_html_tree(tree), "
ABCD
" + _html.render_html_tree(tree), + "
ABCD
", ) tree = _html.get_html_tree(html) @@ -1220,7 +1263,9 @@ def test_trim_after(self): tree = _html.get_html_tree(html) _html.trim_tree_after(tree.find("div/span[2]"), include_element=False) - self.assertEqual(_html.render_html_tree(tree), "
ABC
") + self.assertEqual( + _html.render_html_tree(tree), "
ABC
" + ) def test_trim_before(self): from quotequail import _html @@ -1230,16 +1275,21 @@ def test_trim_before(self): tree = _html.get_html_tree(html) _html.trim_tree_before(tree.find("div/span")) self.assertEqual( - _html.render_html_tree(tree), "
BCDE
" + _html.render_html_tree(tree), + "
BCDE
", ) tree = _html.get_html_tree(html) _html.trim_tree_before(tree.find("div/span[2]")) - self.assertEqual(_html.render_html_tree(tree), "
DE
") + self.assertEqual( + _html.render_html_tree(tree), "
DE
" + ) tree = _html.get_html_tree(html) _html.trim_tree_before(tree.find("div/span"), include_element=False) - self.assertEqual(_html.render_html_tree(tree), "
CDE
") + self.assertEqual( + _html.render_html_tree(tree), "
CDE
" + ) tree = _html.get_html_tree(html) _html.trim_tree_before(tree.find("div/span[2]"), include_element=False)