Mypy, Python 3.10 - 3.12

closeio · Jun 27, 2024 · 1a64acb · 1a64acb
1 parent 7acc68c
commit 1a64acb
Show file tree

Hide file tree

Showing 8 changed files with 168 additions and 140 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -22,7 +22,7 @@ jobs:
       - uses: actions/setup-python@v2
         name: Install Python
         with:
-          python-version: 3.9
+          python-version: 3.10
 
       - run: |
           pip install packaging

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -11,7 +11,7 @@ jobs:
   lint:
     strategy:
       matrix:
-        python-version: ['3.9', '3.10', '3.11']
+        python-version: ['3.10', '3.11', '3.12']
     name: Lint ${{ matrix.python-version }}
     runs-on: 'ubuntu-20.04'
     container: python:${{ matrix.python-version }}
@@ -26,11 +26,16 @@ jobs:
           ruff format --check
           ruff check --select I
 
+      - name: Type check code
+        run: |
+          pip install mypy==1.10.1
+          mypy
+
   # Run tests
   test:
     strategy:
       matrix:
-        python-version: ['3.9', '3.10', '3.11']
+        python-version: ['3.10', '3.11', '3.12']
       # Do not cancel any jobs when a single job fails
       fail-fast: false
     name: Python ${{ matrix.python-version }}

diff --git a/pyproject.toml b/pyproject.toml
@@ -54,3 +54,14 @@ max-branches = 16
 
 [tool.ruff.lint.per-file-ignores]
 "tests/test_quotequail.py" = ["E501", "PT009"]
+
+[tool.mypy]
+python_version = "3.10"
+ignore_missing_imports = true
+no_implicit_optional = true
+strict_equality = true
+follow_imports = "normal"
+warn_unreachable = true
+show_error_context = true
+pretty = true
+files = "quotequail"
diff --git a/quotequail/__init__.py b/quotequail/__init__.py
@@ -7,7 +7,7 @@
 __all__ = ["quote", "quote_html", "unwrap", "unwrap_html"]
 
 
-def quote(text, limit=1000):
+def quote(text: str, limit: int = 1000) -> list[tuple[bool, str]]:
     """
     Take a plain text message as an argument, return a list of tuples. The
     first argument of the tuple denotes whether the text should be expanded by
@@ -33,7 +33,7 @@ def quote(text, limit=1000):
     return [(True, text)]
 
 
-def quote_html(html, limit=1000):
+def quote_html(html: str, limit: int = 1000) -> list[tuple[bool, str]]:
     """
     Like quote(), but takes an HTML message as an argument. The limit param
     represents the maximum number of lines to traverse until quoting the rest
@@ -62,7 +62,7 @@ def quote_html(html, limit=1000):
     ]
 
 
-def unwrap(text):
+def unwrap(text: str) -> dict[str, str] | None:
     """
     If the passed text is the text body of a forwarded message, a reply, or
     contains quoted text, a dictionary with the following keys is returned:
@@ -78,31 +78,33 @@ def unwrap(text):
     """
     lines = text.split("\n")
 
-    result = _internal.unwrap(
+    unwrap_result = _internal.unwrap(
         lines,
         _patterns.MAX_WRAP_LINES,
         _patterns.MIN_HEADER_LINES,
         _patterns.MIN_QUOTED_LINES,
     )
-    if not result:
+    if not unwrap_result:
         return None
 
-    typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result
+    typ, top_range, hdrs, main_range, bottom_range, needs_unindent = (
+        unwrap_result
+    )
 
-    text_top = lines[slice(*top_range)] if top_range else ""
-    text = lines[slice(*main_range)] if main_range else ""
-    text_bottom = lines[slice(*bottom_range)] if bottom_range else ""
+    text_top_lines = lines[slice(*top_range)] if top_range else []
+    text_lines = lines[slice(*main_range)] if main_range else []
+    text_bottom_lines = lines[slice(*bottom_range)] if bottom_range else []
 
     if needs_unindent:
-        text = _internal.unindent_lines(text)
+        text_lines = _internal.unindent_lines(text_lines)
 
     result = {
         "type": typ,
     }
 
-    text = "\n".join(text).strip()
-    text_top = "\n".join(text_top).strip()
-    text_bottom = "\n".join(text_bottom).strip()
+    text = "\n".join(text_lines).strip()
+    text_top = "\n".join(text_top_lines).strip()
+    text_bottom = "\n".join(text_bottom_lines).strip()
 
     if text:
         result["text"] = text
@@ -117,7 +119,7 @@ def unwrap(text):
     return result
 
 
-def unwrap_html(html):
+def unwrap_html(html: str) -> dict[str, str] | None:
     """
     If the passed HTML is the HTML body of a forwarded message, a dictionary
     with the following keys is returned:
@@ -137,38 +139,40 @@ def unwrap_html(html):
 
     start_refs, end_refs, lines = _html.get_line_info(tree)
 
-    result = _internal.unwrap(lines, 1, _patterns.MIN_HEADER_LINES, 1)
+    unwrap_result = _internal.unwrap(lines, 1, _patterns.MIN_HEADER_LINES, 1)
 
-    if result:
-        typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result
+    if unwrap_result:
+        typ, top_range, hdrs, main_range, bottom_range, needs_unindent = (
+            unwrap_result
+        )
 
         result = {
             "type": typ,
         }
 
-        top_range = _html.trim_slice(lines, top_range)
-        main_range = _html.trim_slice(lines, main_range)
-        bottom_range = _html.trim_slice(lines, bottom_range)
+        top_range_slice = _html.trim_slice(lines, top_range)
+        main_range_slice = _html.trim_slice(lines, main_range)
+        bottom_range_slice = _html.trim_slice(lines, bottom_range)
 
-        if top_range:
+        if top_range_slice:
             top_tree = _html.slice_tree(
-                tree, start_refs, end_refs, top_range, html_copy=html
+                tree, start_refs, end_refs, top_range_slice, html_copy=html
             )
             html_top = _html.render_html_tree(top_tree)
             if html_top:
                 result["html_top"] = html_top
 
-        if bottom_range:
+        if bottom_range_slice:
             bottom_tree = _html.slice_tree(
-                tree, start_refs, end_refs, bottom_range, html_copy=html
+                tree, start_refs, end_refs, bottom_range_slice, html_copy=html
             )
             html_bottom = _html.render_html_tree(bottom_tree)
             if html_bottom:
                 result["html_bottom"] = html_bottom
 
-        if main_range:
+        if main_range_slice:
             main_tree = _html.slice_tree(
-                tree, start_refs, end_refs, main_range
+                tree, start_refs, end_refs, main_range_slice
             )
             if needs_unindent:
                 _html.unindent_tree(main_tree)

diff --git a/quotequail/_html.py b/quotequail/_html.py
@@ -1,9 +1,11 @@
 # HTML utils
+from collections.abc import Iterator
 
 import lxml.etree
 import lxml.html
 
 from ._patterns import FORWARD_LINE, FORWARD_STYLES, MULTIPLE_WHITESPACE_RE
+from .types import Element, ElementRef
 
 INLINE_TAGS = [
     "a",
@@ -27,7 +29,7 @@
 END = "end"
 
 
-def trim_tree_after(element, include_element=True):
+def trim_tree_after(element: Element, include_element: bool = True):
     """
     Remove the document tree following the given element. If include_element
     is True, the given element is kept in the tree, otherwise it is removed.
@@ -44,7 +46,9 @@ def trim_tree_after(element, include_element=True):
         el = parent_el
 
 
-def trim_tree_before(element, include_element=True, keep_head=True):
+def trim_tree_before(
+    element: Element, include_element: bool = True, keep_head: bool = True
+) -> None:
     """
     Remove the document tree preceding the given element. If include_element
     is True, the given element is kept in the tree, otherwise it is removed.
@@ -66,7 +70,9 @@ def trim_tree_before(element, include_element=True, keep_head=True):
         el = parent_el
 
 
-def trim_slice(lines, slice_tuple):
+def trim_slice(
+    lines: list[str], slice_tuple: tuple[int | None, int | None] | None
+) -> tuple[int, int] | None:
     """
     Trim a slice tuple (begin, end) so it starts at the first non-empty line
     (obtained via indented_tree_line_generator / get_line_info) and ends at the
@@ -97,7 +103,7 @@ def _empty(line):
     return (slice_start, slice_end)
 
 
-def unindent_tree(element):
+def unindent_tree(element: Element) -> None:
     """
     Remove the outermost indent. For example, the tree
     "<div>A<blockqote>B<div>C<blockquote>D</blockquote>E</div>F</blockquote>G</div>"
@@ -111,7 +117,13 @@ def unindent_tree(element):
             return
 
 
-def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
+def slice_tree(
+    tree: Element,
+    start_refs: list[ElementRef | None],
+    end_refs: list[ElementRef | None],
+    slice_tuple: tuple[int | None, int | None] | None,
+    html_copy: str | None = None,
+):
     """
     Slice the HTML tree with the given start_refs and end_refs (obtained via
     get_line_info) at the given slice_tuple, a tuple (start, end) containing
@@ -190,27 +202,27 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
     return new_tree
 
 
-def get_html_tree(html):
+def get_html_tree(html: str) -> Element:
     """
     Given the HTML string, returns a LXML tree object. The tree is wrapped in
     <div> elements if it doesn't have a top level tag or parsing would
     otherwise result in an error. The wrapping can be later removed with
     strip_wrapping().
     """
     parser = lxml.html.HTMLParser(encoding="utf-8")
-    html = html.encode("utf8")
+    htmlb = html.encode("utf8")
 
     try:
-        tree = lxml.html.fromstring(html, parser=parser)
+        tree = lxml.html.fromstring(htmlb, parser=parser)
     except lxml.etree.Error:
         # E.g. empty document. Use dummy <div>
         tree = lxml.html.fromstring("<div></div>")
 
     # If the document doesn't start with a top level tag, wrap it with a <div>
     # that will be later stripped out for consistent behavior.
     if tree.tag not in lxml.html.defs.top_level_tags:
-        html = b"<div>" + html + b"</div>"
-        tree = lxml.html.fromstring(html, parser=parser)
+        htmlb = b"<div>" + htmlb + b"</div>"
+        tree = lxml.html.fromstring(htmlb, parser=parser)
 
     # HACK for Outlook emails, where tags like <o:p> are rendered as <p>. We
     # can generally ignore these tags so we replace them with <span>, which
@@ -229,7 +241,7 @@ def get_html_tree(html):
     return tree
 
 
-def strip_wrapping(html):
+def strip_wrapping(html: str) -> str:
     """
     Remove the wrapping that might have resulted when using get_html_tree().
     """
@@ -238,7 +250,7 @@ def strip_wrapping(html):
     return html.strip()
 
 
-def render_html_tree(tree):
+def render_html_tree(tree: Element) -> str:
     """
     Render the given HTML tree, and strip any wrapping that was applied in
     get_html_tree().
@@ -257,13 +269,15 @@ def render_html_tree(tree):
     return strip_wrapping(html)
 
 
-def is_indentation_element(element):
+def is_indentation_element(element: Element) -> bool:
     if isinstance(element.tag, str):
         return element.tag.lower() == "blockquote"
     return False
 
 
-def tree_token_generator(el, indentation_level=0):
+def tree_token_generator(
+    el: Element, indentation_level: int = 0
+) -> Iterator[None | tuple[Element, str, int] | str]:
     """
     Yield tokens for the given HTML element as follows:
 
@@ -296,7 +310,13 @@ def tree_token_generator(el, indentation_level=0):
     yield el.tail
 
 
-def tree_line_generator(el, max_lines=None):
+def tree_line_generator(
+    el: Element, max_lines: int | None = None
+) -> Iterator[
+    tuple[
+        tuple[ElementRef, str] | None, tuple[ElementRef, str] | None, int, str
+    ]
+]:
     """
     Iterate through an LXML tree and yield a tuple per line.
 
@@ -327,7 +347,7 @@ def tree_line_generator(el, max_lines=None):
     - ((<Element blockquote>, 'end'), (<Element div>, 'end'), 0, 'world')
     """
 
-    def _trim_spaces(text):
+    def _trim_spaces(text: str) -> str:
         return MULTIPLE_WHITESPACE_RE.sub(" ", text).strip()
 
     counter = 1
@@ -341,7 +361,7 @@ def _trim_spaces(text):
     start_ref = None
 
     # The indentation level at the start of the line.
-    start_indentation_level = None
+    start_indentation_level = 0
 
     for token in tree_token_generator(el):
         if token is None:
@@ -393,12 +413,17 @@ def _trim_spaces(text):
         else:
             raise RuntimeError(f"invalid token: {token}")
 
+    """
+    TODO: wrong type, would trigger error if reached.
     line = _trim_spaces(line)
     if line:
         yield line
+    """
 
 
-def indented_tree_line_generator(el, max_lines=None):
+def indented_tree_line_generator(
+    el: Element, max_lines: int | None = None
+) -> Iterator[tuple[ElementRef | None, ElementRef | None, str]]:
     r"""
     Like tree_line_generator, but yields tuples (start_ref, end_ref, line),
     where the line already takes the indentation into account by having "> "
@@ -413,14 +438,19 @@ def indented_tree_line_generator(el, max_lines=None):
         yield start_ref, end_ref, "> " * indentation_level + full_line
 
 
-def get_line_info(tree, max_lines=None):
+def get_line_info(
+    tree: Element, max_lines: int | None = None
+) -> tuple[list[ElementRef | None], list[ElementRef | None], list[str]]:
     """
     Shortcut for indented_tree_line_generator() that returns an array of
     start references, an array of corresponding end references (see
     tree_line_generator() docs), and an array of corresponding lines.
     """
     line_gen = indented_tree_line_generator(tree, max_lines=max_lines)
-    line_gen_result = list(zip(*line_gen))
+    line_gen_result: (
+        tuple[list[ElementRef | None], list[ElementRef | None], list[str]]
+        | tuple[()]
+    ) = tuple(zip(*line_gen))
     if line_gen_result:
         return line_gen_result
     return [], [], []