Skip to content

Commit

Permalink
Mypy, Python 3.10 - 3.12
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasst committed Jun 27, 2024
1 parent 7acc68c commit 1a64acb
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 140 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
- uses: actions/setup-python@v2
name: Install Python
with:
python-version: 3.9
python-version: 3.10

- run: |
pip install packaging
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
lint:
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11']
python-version: ['3.10', '3.11', '3.12']
name: Lint ${{ matrix.python-version }}
runs-on: 'ubuntu-20.04'
container: python:${{ matrix.python-version }}
Expand All @@ -26,11 +26,16 @@ jobs:
ruff format --check
ruff check --select I
- name: Type check code
run: |
pip install mypy==1.10.1
mypy
# Run tests
test:
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11']
python-version: ['3.10', '3.11', '3.12']
# Do not cancel any jobs when a single job fails
fail-fast: false
name: Python ${{ matrix.python-version }}
Expand Down
11 changes: 11 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,14 @@ max-branches = 16

[tool.ruff.lint.per-file-ignores]
"tests/test_quotequail.py" = ["E501", "PT009"]

[tool.mypy]
python_version = "3.10"
ignore_missing_imports = true
no_implicit_optional = true
strict_equality = true
follow_imports = "normal"
warn_unreachable = true
show_error_context = true
pretty = true
files = "quotequail"
56 changes: 30 additions & 26 deletions quotequail/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
__all__ = ["quote", "quote_html", "unwrap", "unwrap_html"]


def quote(text, limit=1000):
def quote(text: str, limit: int = 1000) -> list[tuple[bool, str]]:
"""
Take a plain text message as an argument, return a list of tuples. The
first argument of the tuple denotes whether the text should be expanded by
Expand All @@ -33,7 +33,7 @@ def quote(text, limit=1000):
return [(True, text)]


def quote_html(html, limit=1000):
def quote_html(html: str, limit: int = 1000) -> list[tuple[bool, str]]:
"""
Like quote(), but takes an HTML message as an argument. The limit param
represents the maximum number of lines to traverse until quoting the rest
Expand Down Expand Up @@ -62,7 +62,7 @@ def quote_html(html, limit=1000):
]


def unwrap(text):
def unwrap(text: str) -> dict[str, str] | None:
"""
If the passed text is the text body of a forwarded message, a reply, or
contains quoted text, a dictionary with the following keys is returned:
Expand All @@ -78,31 +78,33 @@ def unwrap(text):
"""
lines = text.split("\n")

result = _internal.unwrap(
unwrap_result = _internal.unwrap(
lines,
_patterns.MAX_WRAP_LINES,
_patterns.MIN_HEADER_LINES,
_patterns.MIN_QUOTED_LINES,
)
if not result:
if not unwrap_result:
return None

typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result
typ, top_range, hdrs, main_range, bottom_range, needs_unindent = (
unwrap_result
)

text_top = lines[slice(*top_range)] if top_range else ""
text = lines[slice(*main_range)] if main_range else ""
text_bottom = lines[slice(*bottom_range)] if bottom_range else ""
text_top_lines = lines[slice(*top_range)] if top_range else []
text_lines = lines[slice(*main_range)] if main_range else []
text_bottom_lines = lines[slice(*bottom_range)] if bottom_range else []

if needs_unindent:
text = _internal.unindent_lines(text)
text_lines = _internal.unindent_lines(text_lines)

result = {
"type": typ,
}

text = "\n".join(text).strip()
text_top = "\n".join(text_top).strip()
text_bottom = "\n".join(text_bottom).strip()
text = "\n".join(text_lines).strip()
text_top = "\n".join(text_top_lines).strip()
text_bottom = "\n".join(text_bottom_lines).strip()

if text:
result["text"] = text
Expand All @@ -117,7 +119,7 @@ def unwrap(text):
return result


def unwrap_html(html):
def unwrap_html(html: str) -> dict[str, str] | None:
"""
If the passed HTML is the HTML body of a forwarded message, a dictionary
with the following keys is returned:
Expand All @@ -137,38 +139,40 @@ def unwrap_html(html):

start_refs, end_refs, lines = _html.get_line_info(tree)

result = _internal.unwrap(lines, 1, _patterns.MIN_HEADER_LINES, 1)
unwrap_result = _internal.unwrap(lines, 1, _patterns.MIN_HEADER_LINES, 1)

if result:
typ, top_range, hdrs, main_range, bottom_range, needs_unindent = result
if unwrap_result:
typ, top_range, hdrs, main_range, bottom_range, needs_unindent = (
unwrap_result
)

result = {
"type": typ,
}

top_range = _html.trim_slice(lines, top_range)
main_range = _html.trim_slice(lines, main_range)
bottom_range = _html.trim_slice(lines, bottom_range)
top_range_slice = _html.trim_slice(lines, top_range)
main_range_slice = _html.trim_slice(lines, main_range)
bottom_range_slice = _html.trim_slice(lines, bottom_range)

if top_range:
if top_range_slice:
top_tree = _html.slice_tree(
tree, start_refs, end_refs, top_range, html_copy=html
tree, start_refs, end_refs, top_range_slice, html_copy=html
)
html_top = _html.render_html_tree(top_tree)
if html_top:
result["html_top"] = html_top

if bottom_range:
if bottom_range_slice:
bottom_tree = _html.slice_tree(
tree, start_refs, end_refs, bottom_range, html_copy=html
tree, start_refs, end_refs, bottom_range_slice, html_copy=html
)
html_bottom = _html.render_html_tree(bottom_tree)
if html_bottom:
result["html_bottom"] = html_bottom

if main_range:
if main_range_slice:
main_tree = _html.slice_tree(
tree, start_refs, end_refs, main_range
tree, start_refs, end_refs, main_range_slice
)
if needs_unindent:
_html.unindent_tree(main_tree)
Expand Down
70 changes: 50 additions & 20 deletions quotequail/_html.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# HTML utils
from collections.abc import Iterator

import lxml.etree
import lxml.html

from ._patterns import FORWARD_LINE, FORWARD_STYLES, MULTIPLE_WHITESPACE_RE
from .types import Element, ElementRef

INLINE_TAGS = [
"a",
Expand All @@ -27,7 +29,7 @@
END = "end"


def trim_tree_after(element, include_element=True):
def trim_tree_after(element: Element, include_element: bool = True):
"""
Remove the document tree following the given element. If include_element
is True, the given element is kept in the tree, otherwise it is removed.
Expand All @@ -44,7 +46,9 @@ def trim_tree_after(element, include_element=True):
el = parent_el


def trim_tree_before(element, include_element=True, keep_head=True):
def trim_tree_before(
element: Element, include_element: bool = True, keep_head: bool = True
) -> None:
"""
Remove the document tree preceding the given element. If include_element
is True, the given element is kept in the tree, otherwise it is removed.
Expand All @@ -66,7 +70,9 @@ def trim_tree_before(element, include_element=True, keep_head=True):
el = parent_el


def trim_slice(lines, slice_tuple):
def trim_slice(
lines: list[str], slice_tuple: tuple[int | None, int | None] | None
) -> tuple[int, int] | None:
"""
Trim a slice tuple (begin, end) so it starts at the first non-empty line
(obtained via indented_tree_line_generator / get_line_info) and ends at the
Expand Down Expand Up @@ -97,7 +103,7 @@ def _empty(line):
return (slice_start, slice_end)


def unindent_tree(element):
def unindent_tree(element: Element) -> None:
"""
Remove the outermost indent. For example, the tree
"<div>A<blockqote>B<div>C<blockquote>D</blockquote>E</div>F</blockquote>G</div>"
Expand All @@ -111,7 +117,13 @@ def unindent_tree(element):
return


def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
def slice_tree(
tree: Element,
start_refs: list[ElementRef | None],
end_refs: list[ElementRef | None],
slice_tuple: tuple[int | None, int | None] | None,
html_copy: str | None = None,
):
"""
Slice the HTML tree with the given start_refs and end_refs (obtained via
get_line_info) at the given slice_tuple, a tuple (start, end) containing
Expand Down Expand Up @@ -190,27 +202,27 @@ def slice_tree(tree, start_refs, end_refs, slice_tuple, html_copy=None):
return new_tree


def get_html_tree(html):
def get_html_tree(html: str) -> Element:
"""
Given the HTML string, returns a LXML tree object. The tree is wrapped in
<div> elements if it doesn't have a top level tag or parsing would
otherwise result in an error. The wrapping can be later removed with
strip_wrapping().
"""
parser = lxml.html.HTMLParser(encoding="utf-8")
html = html.encode("utf8")
htmlb = html.encode("utf8")

try:
tree = lxml.html.fromstring(html, parser=parser)
tree = lxml.html.fromstring(htmlb, parser=parser)
except lxml.etree.Error:
# E.g. empty document. Use dummy <div>
tree = lxml.html.fromstring("<div></div>")

# If the document doesn't start with a top level tag, wrap it with a <div>
# that will be later stripped out for consistent behavior.
if tree.tag not in lxml.html.defs.top_level_tags:
html = b"<div>" + html + b"</div>"
tree = lxml.html.fromstring(html, parser=parser)
htmlb = b"<div>" + htmlb + b"</div>"
tree = lxml.html.fromstring(htmlb, parser=parser)

# HACK for Outlook emails, where tags like <o:p> are rendered as <p>. We
# can generally ignore these tags so we replace them with <span>, which
Expand All @@ -229,7 +241,7 @@ def get_html_tree(html):
return tree


def strip_wrapping(html):
def strip_wrapping(html: str) -> str:
"""
Remove the wrapping that might have resulted when using get_html_tree().
"""
Expand All @@ -238,7 +250,7 @@ def strip_wrapping(html):
return html.strip()


def render_html_tree(tree):
def render_html_tree(tree: Element) -> str:
"""
Render the given HTML tree, and strip any wrapping that was applied in
get_html_tree().
Expand All @@ -257,13 +269,15 @@ def render_html_tree(tree):
return strip_wrapping(html)


def is_indentation_element(element):
def is_indentation_element(element: Element) -> bool:
if isinstance(element.tag, str):
return element.tag.lower() == "blockquote"
return False


def tree_token_generator(el, indentation_level=0):
def tree_token_generator(
el: Element, indentation_level: int = 0
) -> Iterator[None | tuple[Element, str, int] | str]:
"""
Yield tokens for the given HTML element as follows:
Expand Down Expand Up @@ -296,7 +310,13 @@ def tree_token_generator(el, indentation_level=0):
yield el.tail


def tree_line_generator(el, max_lines=None):
def tree_line_generator(
el: Element, max_lines: int | None = None
) -> Iterator[
tuple[
tuple[ElementRef, str] | None, tuple[ElementRef, str] | None, int, str
]
]:
"""
Iterate through an LXML tree and yield a tuple per line.
Expand Down Expand Up @@ -327,7 +347,7 @@ def tree_line_generator(el, max_lines=None):
- ((<Element blockquote>, 'end'), (<Element div>, 'end'), 0, 'world')
"""

def _trim_spaces(text):
def _trim_spaces(text: str) -> str:
return MULTIPLE_WHITESPACE_RE.sub(" ", text).strip()

counter = 1
Expand All @@ -341,7 +361,7 @@ def _trim_spaces(text):
start_ref = None

# The indentation level at the start of the line.
start_indentation_level = None
start_indentation_level = 0

for token in tree_token_generator(el):
if token is None:
Expand Down Expand Up @@ -393,12 +413,17 @@ def _trim_spaces(text):
else:
raise RuntimeError(f"invalid token: {token}")

"""
TODO: wrong type, would trigger error if reached.
line = _trim_spaces(line)
if line:
yield line
"""


def indented_tree_line_generator(el, max_lines=None):
def indented_tree_line_generator(
el: Element, max_lines: int | None = None
) -> Iterator[tuple[ElementRef | None, ElementRef | None, str]]:
r"""
Like tree_line_generator, but yields tuples (start_ref, end_ref, line),
where the line already takes the indentation into account by having "> "
Expand All @@ -413,14 +438,19 @@ def indented_tree_line_generator(el, max_lines=None):
yield start_ref, end_ref, "> " * indentation_level + full_line


def get_line_info(tree, max_lines=None):
def get_line_info(
tree: Element, max_lines: int | None = None
) -> tuple[list[ElementRef | None], list[ElementRef | None], list[str]]:
"""
Shortcut for indented_tree_line_generator() that returns an array of
start references, an array of corresponding end references (see
tree_line_generator() docs), and an array of corresponding lines.
"""
line_gen = indented_tree_line_generator(tree, max_lines=max_lines)
line_gen_result = list(zip(*line_gen))
line_gen_result: (
tuple[list[ElementRef | None], list[ElementRef | None], list[str]]
| tuple[()]
) = tuple(zip(*line_gen))
if line_gen_result:
return line_gen_result
return [], [], []
Loading

0 comments on commit 1a64acb

Please sign in to comment.