diff --git a/dev_requirements/requirements-linting.txt b/dev_requirements/requirements-linting.txt index 278cd5c8..fb03f883 100644 --- a/dev_requirements/requirements-linting.txt +++ b/dev_requirements/requirements-linting.txt @@ -5,7 +5,7 @@ # # pip-compile-multi # -astroid==3.1.0 +astroid==3.2.1 # via pylint dill==0.3.8 # via pylint @@ -15,7 +15,7 @@ mccabe==0.7.0 # via pylint platformdirs==4.2.0 # via pylint -pylint==3.1.1 +pylint==3.2.0 # via -r dev_requirements/requirements-linting.in tomlkit==0.12.5 # via pylint diff --git a/requirements.txt b/requirements.txt index 19defecf..6967830c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,7 @@ pydantic-core==2.18.2 # via pydantic python-dateutil==2.9.0.post0 # via pandas -python-docx==1.1.0 +python-docx==1.1.2 # via kohlrahbi (pyproject.toml) pytz==2024.1 # via diff --git a/src/kohlrahbi/__init__.py b/src/kohlrahbi/__init__.py index 56786943..812a57dd 100644 --- a/src/kohlrahbi/__init__.py +++ b/src/kohlrahbi/__init__.py @@ -12,7 +12,7 @@ @click.group() @click.version_option(version=version) -def cli(): +def cli() -> None: """Kohlrahbi CLI tool""" diff --git a/src/kohlrahbi/ahb/__init__.py b/src/kohlrahbi/ahb/__init__.py index 4e8af948..771ce2f1 100644 --- a/src/kohlrahbi/ahb/__init__.py +++ b/src/kohlrahbi/ahb/__init__.py @@ -9,10 +9,10 @@ from typing import Any, Dict, Optional import click -import docx # type: ignore +import docx import tomlkit -from docx.document import Document # type:ignore[import] -from docx.table import Table # type:ignore[import] +from docx.document import Document +from docx.table import Table from maus.edifact import EdifactFormatVersion from kohlrahbi.ahbtable.ahbtable import AhbTable @@ -48,7 +48,7 @@ def process_ahb_table( pruefi: str, output_path: Path, file_type: str, -): +) -> None: """ Process the ahb table. """ @@ -98,7 +98,7 @@ def process_pruefi( path_to_ahb_docx_file: Path, output_path: Path, file_type: str, -): +) -> None: """ Process one pruefi. If the input path ends with .docx, we assume that the file containing the pruefi is given. @@ -183,7 +183,7 @@ def extract_pruefis_from_table(table: Table) -> list[str]: def table_header_contains_text_pruefidentifikator(table: Table) -> bool: """Checks if the table header contains the text 'Prüfidentifikator'.""" - return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator") + return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator") # type:ignore[no-any-return] def get_pruefi_to_file_mapping(basic_input_path: Path, format_version: EdifactFormatVersion) -> dict[str, str]: diff --git a/src/kohlrahbi/ahb/command.py b/src/kohlrahbi/ahb/command.py index d266cbe8..a068d38d 100644 --- a/src/kohlrahbi/ahb/command.py +++ b/src/kohlrahbi/ahb/command.py @@ -12,7 +12,7 @@ from kohlrahbi.enums.ahbexportfileformat import AhbExportFileFormat -def check_python_version(): +def check_python_version() -> None: """ Check if the Python interpreter is greater or equal to 3.11 """ @@ -24,7 +24,7 @@ def check_python_version(): # pylint: disable=unused-argument -def validate_path(ctx, param, value): +def validate_path(ctx, param, value) -> Path: # type:ignore[no-untyped-def] """ Ensure the path exists or offer to create it. """ @@ -97,7 +97,7 @@ def ahb( format_version: EdifactFormatVersion | str, assume_yes: bool, # pylint: disable=unused-argument # it is used by the callback function of the output-path -): +) -> None: """ Scrape AHB documents for pruefidentifikatoren. This is a command line interface for the pruefis module. diff --git a/src/kohlrahbi/ahbtable/ahbcondtions.py b/src/kohlrahbi/ahbtable/ahbcondtions.py index 08d049aa..f2296e72 100644 --- a/src/kohlrahbi/ahbtable/ahbcondtions.py +++ b/src/kohlrahbi/ahbtable/ahbcondtions.py @@ -4,7 +4,7 @@ import re from pathlib import Path -from docx.table import Table as DocxTable # type: ignore[import-untyped] +from docx.table import Table as DocxTable from maus.edifact import EdifactFormat from pydantic import BaseModel, ConfigDict @@ -53,10 +53,11 @@ def collect_conditions( logger.info("The package conditions for %s were collected.", edifact_format) return conditions_dict - def include_condition_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None: + def include_condition_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None: """ " Include a dict of conditions to the conditions_dict""" if to_add is None: logger.info("Conditions dict to be added is empty.") + return for edifact_format, edi_cond_dict in to_add.items(): for condition_key, condition_text in edi_cond_dict.items(): if edifact_format in self.conditions_dict: @@ -117,8 +118,7 @@ def parse_conditions_from_string( # check whether condition was already collected: existing_text = conditions_dict[edifact_format].get(match[0]) is_condition_key_collected_yet = existing_text is not None - if is_condition_key_collected_yet and existing_text is not None: - key_exits_but_shorter_text = len(text) > len(existing_text) + key_exits_but_shorter_text = existing_text is not None and len(text) > len(existing_text) if not is_condition_key_collected_yet or key_exits_but_shorter_text: conditions_dict[edifact_format][match[0]] = text return conditions_dict diff --git a/src/kohlrahbi/ahbtable/ahbpackagetable.py b/src/kohlrahbi/ahbtable/ahbpackagetable.py index 7e40e1fc..22601bc1 100644 --- a/src/kohlrahbi/ahbtable/ahbpackagetable.py +++ b/src/kohlrahbi/ahbtable/ahbpackagetable.py @@ -7,7 +7,7 @@ class which contains AHB package condition table from pathlib import Path import pandas as pd -from docx.table import Table as DocxTable # type: ignore[import-untyped] +from docx.table import Table as DocxTable from maus.edifact import EdifactFormat from pydantic import BaseModel, ConfigDict @@ -52,7 +52,7 @@ def provide_conditions(self, edifact_format: EdifactFormat) -> dict[EdifactForma logger.info("The package conditions for %s were collected.", edifact_format) return conditions_dict - def provide_packages(self, edifact_format: EdifactFormat): + def provide_packages(self, edifact_format: EdifactFormat) -> None: """collect conditions from package table and store them in conditions dict.""" package_dict: dict[EdifactFormat, dict[str, str]] = {edifact_format: {}} @@ -71,20 +71,20 @@ def provide_packages(self, edifact_format: EdifactFormat): # check whether package was already collected: existing_text = package_dict[edifact_format].get(package) is_package_key_collected_yet = existing_text is not None - if is_package_key_collected_yet: - key_exits_but_shorter_text = len(package_conditions) > len( - existing_text # type: ignore[arg-type] - ) # type: ignore[arg-type] + key_exits_but_shorter_text = existing_text is not None and len(package_conditions) > len( + existing_text + ) if not is_package_key_collected_yet or key_exits_but_shorter_text: package_dict[edifact_format][package] = package_conditions logger.info("Packages for %s were collected.", edifact_format) self.package_dict = package_dict - def include_package_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None: + def include_package_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None: """Include a dict of conditions to the conditions_dict""" if to_add is None: logger.info("Packages dict to be added is empty.") + return for edifact_format, edi_cond_dict in to_add.items(): for package_key, package_conditions in edi_cond_dict.items(): if edifact_format in self.package_dict: diff --git a/src/kohlrahbi/ahbtable/ahbsubtable.py b/src/kohlrahbi/ahbtable/ahbsubtable.py index d232ab85..18f1bc15 100644 --- a/src/kohlrahbi/ahbtable/ahbsubtable.py +++ b/src/kohlrahbi/ahbtable/ahbsubtable.py @@ -5,12 +5,13 @@ from typing import Generator import pandas as pd -from docx.table import Table as DocxTable # type:ignore[import] -from docx.table import _Cell # type:ignore[import] +from docx.table import Table as DocxTable +from docx.table import _Cell, _Row from pydantic import BaseModel, ConfigDict from kohlrahbi.ahbtable.ahbtablerow import AhbTableRow -from kohlrahbi.row_type_checker import RowType, get_row_type +from kohlrahbi.enums import RowType +from kohlrahbi.row_type_checker import get_row_type from kohlrahbi.seed import Seed @@ -122,7 +123,7 @@ def from_headless_table(cls, tmd: Seed, docx_table: DocxTable) -> "AhbSubTable": return cls(table_meta_data=tmd, table=ahb_table_dataframe) @staticmethod - def iter_visible_cells(row) -> Generator[_Cell, None, None]: + def iter_visible_cells(row: _Row) -> Generator[_Cell, None, None]: """ This function makes sure that you will iterate over the cells you see in the word document. For more information go to https://github.com/python-openxml/python-docx/issues/970#issuecomment-877386927 diff --git a/src/kohlrahbi/ahbtable/ahbtable.py b/src/kohlrahbi/ahbtable/ahbtable.py index 6a299ca4..52cbb524 100644 --- a/src/kohlrahbi/ahbtable/ahbtable.py +++ b/src/kohlrahbi/ahbtable/ahbtable.py @@ -91,7 +91,7 @@ def append_ahb_sub_table(self, ahb_sub_table: AhbSubTable) -> None: self.table = pd.concat([self.table, ahb_sub_table.table], ignore_index=True) @staticmethod - def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool: + def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool: # type:ignore[type-arg] """ Returns true if the given raw line only contains some meaningful data in the "Segment Gruppe" key """ @@ -112,6 +112,7 @@ def sanitize(self) -> None: iterable_ahb_table = peekable(self.table.iterrows()) self.table.reset_index(drop=True, inplace=True) for _, row in iterable_ahb_table: + # pylint: disable=unpacking-non-sequence # it is a tuple indeed index_of_next_row, next_row = iterable_ahb_table.peek( ( 0, diff --git a/src/kohlrahbi/ahbtable/ahbtablerow.py b/src/kohlrahbi/ahbtable/ahbtablerow.py index bb48404d..2a6073ac 100644 --- a/src/kohlrahbi/ahbtable/ahbtablerow.py +++ b/src/kohlrahbi/ahbtable/ahbtablerow.py @@ -5,11 +5,11 @@ from typing import Optional import pandas as pd -from docx.table import _Cell # type:ignore[import] +from docx.table import _Cell from pydantic import BaseModel, ConfigDict from kohlrahbi.docxtablecells import BedingungCell, BodyCell, EdifactStrukturCell -from kohlrahbi.row_type_checker import RowType +from kohlrahbi.enums import RowType from kohlrahbi.seed import Seed @@ -46,7 +46,9 @@ def parse( dtype="str", ) # pylint: disable=unsubscriptable-object, no-member - empty_row: pd.Series[str] = pd.Series(len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers) + empty_row: pd.Series = pd.Series( # type:ignore[type-arg] + len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers + ) ahb_row_dataframe = pd.concat([ahb_row_dataframe, empty_row.to_frame().T], ignore_index=True) diff --git a/src/kohlrahbi/changehistory/__init__.py b/src/kohlrahbi/changehistory/__init__.py index 4733fbed..e3633fe3 100644 --- a/src/kohlrahbi/changehistory/__init__.py +++ b/src/kohlrahbi/changehistory/__init__.py @@ -13,10 +13,10 @@ from pathlib import Path from typing import Optional -import docx # type: ignore +import docx import pandas as pd -from docx.document import Document # type:ignore[import] -from docx.table import Table # type: ignore +from docx.document import Document +from docx.table import Table from kohlrahbi.changehistory.changehistorytable import ChangeHistoryTable from kohlrahbi.docxfilefinder import DocxFileFinder diff --git a/src/kohlrahbi/changehistory/changehistorytable.py b/src/kohlrahbi/changehistory/changehistorytable.py index c21d9fce..73059b61 100644 --- a/src/kohlrahbi/changehistory/changehistorytable.py +++ b/src/kohlrahbi/changehistory/changehistorytable.py @@ -3,7 +3,7 @@ """ import pandas as pd -from docx.table import Table # type:ignore[import] +from docx.table import Table from pydantic import BaseModel, ConfigDict from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable @@ -54,7 +54,7 @@ def is_empty(val: str) -> bool: return pd.isna(val) or val == "" # Define a function to check if a value is considered empty for our case - def is_the_first_column_empty(row: pd.Series) -> bool: + def is_the_first_column_empty(row: pd.Series) -> bool: # type:ignore[type-arg] """ Checks if the first column of the given row is empty. This is our indicator if the current row is a continuation of the upper row. diff --git a/src/kohlrahbi/changehistory/command.py b/src/kohlrahbi/changehistory/command.py index ed9c63a5..81543ef5 100644 --- a/src/kohlrahbi/changehistory/command.py +++ b/src/kohlrahbi/changehistory/command.py @@ -47,7 +47,7 @@ def changehistory( format_version: EdifactFormatVersion | str, assume_yes: bool, # pylint: disable=unused-argument # it is used by the callback function of the output-path -): +) -> None: """ Scrape change histories from the input path and save them to the output path. diff --git a/src/kohlrahbi/conditions/__init__.py b/src/kohlrahbi/conditions/__init__.py index 7416b144..2054b75c 100644 --- a/src/kohlrahbi/conditions/__init__.py +++ b/src/kohlrahbi/conditions/__init__.py @@ -4,7 +4,7 @@ from pathlib import Path -import docx # type: ignore[import-untyped] +import docx from maus.edifact import EdifactFormat, EdifactFormatVersion, get_format_of_pruefidentifikator from kohlrahbi.ahb import get_pruefi_to_file_mapping @@ -45,8 +45,8 @@ def scrape_conditions( for edifact_format, files in all_format_files.items(): for file in files: # pylint: disable=too-many-function-args - # type: ignore[call-arg, arg-type] - doc = docx.Document(basic_input_path / path_to_file / Path(file)) + path: Path = basic_input_path / path_to_file / Path(file) + doc = docx.Document(str(path.absolute())) logger.info("Start scraping conditions for %s in %s", edifact_format, file) if not doc: logger.error("Could not open file %s as docx", Path(file)) diff --git a/src/kohlrahbi/conditions/command.py b/src/kohlrahbi/conditions/command.py index 9a33a5c9..cc406bd5 100644 --- a/src/kohlrahbi/conditions/command.py +++ b/src/kohlrahbi/conditions/command.py @@ -28,7 +28,7 @@ from kohlrahbi.conditions import scrape_conditions -def check_python_version(): +def check_python_version() -> None: """ Check if the Python interpreter is greater or equal to 3.11 """ @@ -40,7 +40,7 @@ def check_python_version(): # pylint: disable=unused-argument -def validate_path(ctx, param, value): +def validate_path(ctx, param, value) -> Path: # type:ignore[no-untyped-def] """ Ensure the path exists or offer to create it. """ @@ -91,7 +91,7 @@ def validate_path(ctx, param, value): ) def conditions( edi_energy_mirror_path: Path, output_path: Path, format_version: EdifactFormatVersion | str, assume_yes: bool -): +) -> None: """ Scrape AHB documents for conditions. """ diff --git a/src/kohlrahbi/docxfilefinder.py b/src/kohlrahbi/docxfilefinder.py index 4207b414..f1ca3b86 100644 --- a/src/kohlrahbi/docxfilefinder.py +++ b/src/kohlrahbi/docxfilefinder.py @@ -138,7 +138,7 @@ def filter_docx_files_for_edifact_format(self, edifact_format: EdifactFormat) -> self.paths_to_docx_files = [path for path in self.paths_to_docx_files if str(edifact_format) in path.name] - def remove_temporary_files(self): + def remove_temporary_files(self) -> None: """ This method removes all temporary files from paths_to_docx_files. Temporary files lead to the exception `BadZipFile: File is not a zip file`. diff --git a/src/kohlrahbi/docxtablecells/__init__.py b/src/kohlrahbi/docxtablecells/__init__.py index 4f840e00..4e897f7e 100644 --- a/src/kohlrahbi/docxtablecells/__init__.py +++ b/src/kohlrahbi/docxtablecells/__init__.py @@ -5,3 +5,5 @@ from .bedinungscell import BedingungCell from .bodycell import BodyCell from .edifactstrukturcell import EdifactStrukturCell + +__all__ = ["BedingungCell", "BodyCell", "EdifactStrukturCell"] diff --git a/src/kohlrahbi/docxtablecells/bedinungscell.py b/src/kohlrahbi/docxtablecells/bedinungscell.py index 6720761a..8b77f272 100644 --- a/src/kohlrahbi/docxtablecells/bedinungscell.py +++ b/src/kohlrahbi/docxtablecells/bedinungscell.py @@ -5,7 +5,7 @@ import re import pandas as pd -from docx.table import _Cell # type:ignore[import] +from docx.table import _Cell from pydantic import BaseModel, ConfigDict diff --git a/src/kohlrahbi/docxtablecells/bodycell.py b/src/kohlrahbi/docxtablecells/bodycell.py index 1a82a08c..9749d5dd 100644 --- a/src/kohlrahbi/docxtablecells/bodycell.py +++ b/src/kohlrahbi/docxtablecells/bodycell.py @@ -3,7 +3,8 @@ """ import pandas as pd -from docx.table import _Cell # type:ignore[import] +from docx.table import _Cell +from docx.text.paragraph import Paragraph from maus.reader.flat_ahb_reader import FlatAhbCsvReader from pydantic import BaseModel, ConfigDict @@ -103,9 +104,9 @@ def parse(self, ahb_row_dataframe: pd.DataFrame) -> pd.DataFrame: return ahb_row_dataframe - def has_paragraph_tabstops(self, paragraph) -> bool: + def has_paragraph_tabstops(self, paragraph: Paragraph) -> bool: """ Checks if the given paragraph contains tabstops """ tab_stops = list(paragraph.paragraph_format.tab_stops) - return len(tab_stops) > 0 + return any(tab_stops) diff --git a/src/kohlrahbi/docxtablecells/edifactstrukturcell.py b/src/kohlrahbi/docxtablecells/edifactstrukturcell.py index 0907ed53..e6a7eef2 100644 --- a/src/kohlrahbi/docxtablecells/edifactstrukturcell.py +++ b/src/kohlrahbi/docxtablecells/edifactstrukturcell.py @@ -5,7 +5,7 @@ import re import pandas as pd -from docx.table import _Cell # type:ignore[import] +from docx.table import _Cell from pydantic import BaseModel, ConfigDict _segment_group_pattern = re.compile(r"^SG\d+$") diff --git a/src/kohlrahbi/enums/__init__.py b/src/kohlrahbi/enums/__init__.py index a6ba61bb..4bf0a114 100644 --- a/src/kohlrahbi/enums/__init__.py +++ b/src/kohlrahbi/enums/__init__.py @@ -4,3 +4,5 @@ from .row_type import RowType from .row_type_color import RowTypeColor + +__all__ = ["RowType", "RowTypeColor"] diff --git a/src/kohlrahbi/read_functions.py b/src/kohlrahbi/read_functions.py index 751e8429..419155f2 100644 --- a/src/kohlrahbi/read_functions.py +++ b/src/kohlrahbi/read_functions.py @@ -2,13 +2,13 @@ A collection of functions to get information from AHB tables. """ -from typing import Generator, Optional, Tuple, Union +from typing import Generator, Optional, Tuple, TypeGuard, Union -from docx.document import Document # type:ignore[import] -from docx.oxml.table import CT_Tbl # type:ignore[import] -from docx.oxml.text.paragraph import CT_P # type:ignore[import] -from docx.table import Table, _Cell # type:ignore[import] -from docx.text.paragraph import Paragraph # type:ignore[import] +from docx.document import Document +from docx.oxml.table import CT_Tbl +from docx.oxml.text.paragraph import CT_P +from docx.table import Table, _Cell +from docx.text.paragraph import Paragraph from maus.edifact import EdifactFormat, get_format_of_pruefidentifikator from kohlrahbi.ahbtable.ahbcondtions import AhbConditions @@ -48,7 +48,7 @@ def table_header_starts_with_text_edifact_struktur(table: Table) -> bool: return table.cell(row_idx=0, col_idx=0).text.strip() == "EDIFACT Struktur" -def is_item_header_of_change_history_section(item: Paragraph | Table | None, style_name: str) -> bool: +def is_item_header_of_change_history_section(item: Paragraph | Table | None, style_name: str) -> TypeGuard[Paragraph]: """ Checks if the given item is a header of the change history section. """ @@ -57,14 +57,14 @@ def is_item_header_of_change_history_section(item: Paragraph | Table | None, sty return isinstance(item, Paragraph) and "Änderungshistorie" in item.text and "Heading" in style_name -def is_item_text_paragraph(item: Paragraph | Table | None, style_name: str) -> bool: +def is_item_text_paragraph(item: Paragraph | Table | None, style_name: str) -> TypeGuard[Paragraph]: """ Checks if the given item is a text paragraph. """ return isinstance(item, Paragraph) and "Heading" not in style_name -def is_item_table_with_pruefidentifikatoren(item: Paragraph | Table | None) -> bool: +def is_item_table_with_pruefidentifikatoren(item: Paragraph | Table | None) -> TypeGuard[Table]: """ Check if the item is a Table and contains Pruefidentifikatoren. @@ -78,10 +78,8 @@ def is_item_table_with_pruefidentifikatoren(item: Paragraph | Table | None) -> b def is_item_headless_table( - item: Paragraph | Table | None, - # seed: Seed | None, - ahb_table: AhbTable | None, -) -> bool: + value: tuple[Union[Paragraph, Table, None], Union[AhbTable, None]] +) -> TypeGuard[tuple[Table, AhbTable]]: """ Checks if the given item is a headless table. @@ -93,11 +91,12 @@ def is_item_headless_table( Returns: bool: True if the item is a headless table, False otherwise. """ + item, ahb_table = value # return isinstance(item, Table) and seed is not None and ahb_table is not None return isinstance(item, Table) and ahb_table is not None -def get_ahb_table(document, pruefi: str) -> Optional[AhbTable]: +def get_ahb_table(document: Document, pruefi: str) -> Optional[AhbTable]: """ Reads a docx file and extracts all information for a given Prüfidentifikator. If the Prüfidentifikator is not found or we reach the end of the AHB document @@ -111,8 +110,8 @@ def get_ahb_table(document, pruefi: str) -> Optional[AhbTable]: AhbTable or None: The extracted AHB table or None if not found """ - ahb_table = None - seed = None + ahb_table: AhbTable | None = None + seed: Seed | None = None searched_pruefi_is_found = False for item in get_all_paragraphs_and_tables(document): @@ -133,7 +132,7 @@ def get_ahb_table(document, pruefi: str) -> Optional[AhbTable]: searched_pruefi_is_found, ahb_table = process_table(item, pruefi, searched_pruefi_is_found, ahb_table, seed) - if ahb_table: + if ahb_table is not None: ahb_table.sanitize() return ahb_table @@ -141,29 +140,37 @@ def get_ahb_table(document, pruefi: str) -> Optional[AhbTable]: return None -def get_style_name(item) -> str: +def get_style_name(item: Paragraph | Table) -> str: """Extracts and normalizes the style name of a document item.""" - return item.style.name if item.style else "None" + return item.style.name if item.style else "None" # type:ignore[no-any-return] -def reached_end_of_document(style_name, item) -> bool: +def reached_end_of_document(style_name: str, item: Paragraph | Table | None) -> bool: """Checks if the current item marks the end of the document.""" return is_item_header_of_change_history_section(item, style_name) -def update_seed(item, seed): +def update_seed(item: Paragraph | Table | None, seed: Seed | None) -> Seed | None: """Updates the seed if the current item is a table with Prüfidentifikatoren.""" if is_item_table_with_pruefidentifikatoren(item): return Seed.from_table(docx_table=item) return seed -def should_end_search(pruefi, seed, searched_pruefi_is_found): +def should_end_search(pruefi: str, seed: Seed | None, searched_pruefi_is_found: bool) -> bool: """Determines if the search for the AHB table should end.""" - return seed and pruefi not in seed.pruefidentifikatoren and searched_pruefi_is_found + if seed is None: + return False + return pruefi not in seed.pruefidentifikatoren and searched_pruefi_is_found -def process_table(item, pruefi, searched_pruefi_is_found, ahb_table, seed=None): +def process_table( + item: Paragraph | Table | None, + pruefi: str, + searched_pruefi_is_found: bool, + ahb_table: AhbTable | None, + seed: Seed | None = None, +) -> tuple[bool, AhbTable]: """Processes tables to find and build the AHB table.""" if is_item_table_with_pruefidentifikatoren(item): seed = Seed.from_table(docx_table=item) @@ -174,37 +181,39 @@ def process_table(item, pruefi, searched_pruefi_is_found, ahb_table, seed=None): ahb_table = AhbTable.from_ahb_sub_table(ahb_sub_table=ahb_sub_table) searched_pruefi_is_found = True - # elif is_item_headless_table(item, seed, ahb_table): - elif is_item_headless_table(item, ahb_table): + elif is_item_headless_table((item, ahb_table)): + assert ahb_table is not None + assert isinstance(item, Table) + assert seed is not None ahb_sub_table = AhbSubTable.from_headless_table(docx_table=item, tmd=seed) ahb_table.append_ahb_sub_table(ahb_sub_table=ahb_sub_table) - - return searched_pruefi_is_found, ahb_table + # actually, the ahb_table is none here (see test_kohlrahbi_cli_with_valid_arguments) + return searched_pruefi_is_found, ahb_table # type:ignore[return-value] # Logging functions -def log_end_of_document(pruefi): +def log_end_of_document(pruefi: str) -> None: """ Logs that the end of the document was reached before finding the table for a given Prüfi. """ logger.info("Reached the end of the document before finding the table for Prüfi '%s'.", pruefi) -def log_end_of_ahb_table(pruefi): +def log_end_of_ahb_table(pruefi: str) -> None: """ Logs that the end of the AHB table was reached for a given Prüfi. """ logger.info("Reached the end of the AHB table for Prüfi '%s'.", pruefi) -def log_found_pruefi(pruefi): +def log_found_pruefi(pruefi: str) -> None: """ Logs that the AHB table for a given Prüfi was found. """ logger.info("Found the AHB table for Prüfi '%s'.", pruefi) -def log_pruefi_not_found(pruefi): +def log_pruefi_not_found(pruefi: str) -> None: """ Logs that the Prüfi was not found in the provided document. """ @@ -275,7 +284,9 @@ def is_last_row_unt_0062(item: Table | Paragraph) -> bool: return isinstance(item, Table) and "UNT\t0062" == item.cell(row_idx=-1, col_idx=0).text.strip() -def is_relevant_pruefi_table(item: Paragraph | Table, seed: Seed, edifact_format) -> bool: +def is_relevant_pruefi_table( + item: Paragraph | Table, seed: Seed | None, edifact_format: EdifactFormat +) -> TypeGuard[Table]: """compares new pruefis to last pruefi and thus checks whether new table""" return ( isinstance(item, Table) diff --git a/src/kohlrahbi/row_type_checker.py b/src/kohlrahbi/row_type_checker.py index ef817dd9..86b9b4b4 100644 --- a/src/kohlrahbi/row_type_checker.py +++ b/src/kohlrahbi/row_type_checker.py @@ -2,15 +2,15 @@ This module contains all functions to define the type of a row of the tables in an AHB. """ -from docx.oxml.ns import qn # type:ignore[import] -from docx.oxml.parser import OxmlElement # type:ignore[import] -from docx.shared import RGBColor # type:ignore[import] -from docx.table import _Cell # type:ignore[import] +from docx.oxml.ns import qn +from docx.oxml.parser import OxmlElement +from docx.shared import RGBColor +from docx.table import _Cell from kohlrahbi.enums import RowType -def set_table_header_bg_color(cell, hex_color: str): +def set_table_header_bg_color(cell: _Cell, hex_color: str) -> _Cell: """ set background shading for Header Rows """ @@ -48,7 +48,8 @@ def is_row_segmentname(edifact_struktur_cell: _Cell) -> bool: bool: """ try: - return edifact_struktur_cell.paragraphs[0].runs[0].font.color.rgb == RGBColor(128, 128, 128) # grey + colour_is_grey: bool = edifact_struktur_cell.paragraphs[0].runs[0].font.color.rgb == RGBColor(128, 128, 128) + return colour_is_grey except IndexError: return False diff --git a/src/kohlrahbi/seed.py b/src/kohlrahbi/seed.py index 0a25c323..b051bc2b 100644 --- a/src/kohlrahbi/seed.py +++ b/src/kohlrahbi/seed.py @@ -2,7 +2,7 @@ This module provides a class to collect information which of need for all parsing functions """ -from docx.table import Table # type:ignore[import] +from docx.table import Table from pydantic import BaseModel from kohlrahbi.enums import RowType @@ -56,7 +56,7 @@ def from_table(cls, docx_table: Table) -> "Seed": # metadata metadata = table_header.pruefi_meta_data - base_column_names: list = [ + base_column_names: list[str] = [ "Segment Gruppe", "Segment", "Datenelement", diff --git a/src/kohlrahbi/table_header.py b/src/kohlrahbi/table_header.py index 358a644b..892a7cf4 100644 --- a/src/kohlrahbi/table_header.py +++ b/src/kohlrahbi/table_header.py @@ -5,8 +5,8 @@ from enum import StrEnum from typing import Dict, List, Mapping, cast -from docx.table import _Cell # type:ignore[import] -from docx.text.paragraph import Paragraph # type:ignore[import] +from docx.table import _Cell +from docx.text.paragraph import Paragraph from more_itertools import first, last from pydantic import BaseModel @@ -157,7 +157,7 @@ def from_header_cell(cls, row_cell: _Cell) -> "TableHeader": return cls(pruefi_meta_data=pruefi_meta_data) @staticmethod - def initialize_collector(paragraph) -> Dict[str, Dict[str, str | int]]: + def initialize_collector(paragraph: Paragraph) -> Dict[str, Dict[str, str | int]]: """Initialize the collector""" current_tabstop_positions = get_tabstop_positions(paragraph=paragraph) splitted_text = paragraph.text.split("\t") diff --git a/src/kohlrahbi/unfoldedahb/__init__.py b/src/kohlrahbi/unfoldedahb/__init__.py index 6fc1c8b6..4a444599 100644 --- a/src/kohlrahbi/unfoldedahb/__init__.py +++ b/src/kohlrahbi/unfoldedahb/__init__.py @@ -5,3 +5,5 @@ from .unfoldedahbline import UnfoldedAhbLine from .unfoldedahbtable import UnfoldedAhb from .unfoldedahbtablemetadata import UnfoldedAhbTableMetaData + +__all__ = ["UnfoldedAhb", "UnfoldedAhbLine", "UnfoldedAhbTableMetaData"] diff --git a/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py b/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py index a1039c6b..d5881146 100644 --- a/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py +++ b/src/kohlrahbi/unfoldedahb/unfoldedahbtable.py @@ -71,7 +71,7 @@ class UnfoldedAhb(BaseModel): unfolded_ahb_lines: list[UnfoldedAhbLine] @classmethod - def from_ahb_table(cls, ahb_table: AhbTable, pruefi: str): + def from_ahb_table(cls, ahb_table: AhbTable, pruefi: str) -> "UnfoldedAhb": """ This function creates an UnfoldedAhb from an AhbTable. """ @@ -89,7 +89,7 @@ def from_ahb_table(cls, ahb_table: AhbTable, pruefi: str): ) if UnfoldedAhb._is_section_name(ahb_row=row): - _, next_row = iterable_ahb_table.peek() + _, next_row = iterable_ahb_table.peek() # pylint: disable=unpacking-non-sequence # it is a tuple indeed ahb_expression = next_row[pruefi] if _segment_group_pattern.match(next_row["Segment Gruppe"]): @@ -236,7 +236,7 @@ def _get_section_name(segment_gruppe_or_section_name: str, last_section_name: st return last_section_name @staticmethod - def _is_section_name(ahb_row: pd.Series) -> bool: + def _is_section_name(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] """ Checks if the current AHB row is a section name. It uses the same logic as the function 'line_contains_only_segment_gruppe' @@ -245,7 +245,7 @@ def _is_section_name(ahb_row: pd.Series) -> bool: return AhbTable.line_contains_only_segment_gruppe(ahb_row) @staticmethod - def _is_segment_group(ahb_row: pd.Series) -> bool: + def _is_segment_group(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] """Checks if the current AHB row is a segment group.""" if _segment_group_pattern.match(ahb_row["Segment Gruppe"]) and not ahb_row["Segment"]: @@ -253,7 +253,7 @@ def _is_segment_group(ahb_row: pd.Series) -> bool: return False @staticmethod - def _is_segment_opening_line(ahb_row: pd.Series) -> bool: + def _is_segment_opening_line(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] """Checks if the current AHB row is a segment opening line. Example: @@ -273,7 +273,7 @@ def _is_segment_opening_line(ahb_row: pd.Series) -> bool: return False @staticmethod - def _is_just_segment(ahb_row: pd.Series) -> bool: + def _is_just_segment(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] """ Checks if the given AHB row is a segment """ @@ -287,7 +287,7 @@ def _is_just_segment(ahb_row: pd.Series) -> bool: return False @staticmethod - def _is_dataelement(ahb_row: pd.Series) -> bool: + def _is_dataelement(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] """ Checks if the given AHB row is a dataelement """ @@ -296,7 +296,7 @@ def _is_dataelement(ahb_row: pd.Series) -> bool: return False @staticmethod - def _is_just_value_pool_entry(ahb_row: pd.Series) -> bool: + def _is_just_value_pool_entry(ahb_row: pd.Series) -> bool: # type:ignore[type-arg] """ Checks if the given AHB row contains only a value pool entry (w/o Segment (group) and data element) """ diff --git a/tox.ini b/tox.ini index 3b851720..5003ec98 100644 --- a/tox.ini +++ b/tox.ini @@ -36,7 +36,7 @@ deps = -rrequirements.txt -r dev_requirements/requirements-type_check.txt commands = - mypy --show-error-codes src/kohlrahbi + mypy --show-error-codes src/kohlrahbi --strict mypy --show-error-codes unittests # mypy --show-error-codes unittests # does not work yet, sadly; Some tox/packaging problems # add single files (ending with .py) or packages here diff --git a/unittests/cellparagraph.py b/unittests/cellparagraph.py index 14376d5c..76be0b50 100644 --- a/unittests/cellparagraph.py +++ b/unittests/cellparagraph.py @@ -1,7 +1,7 @@ from typing import Optional from attr import define, field -from docx.shared import Length # type:ignore[import] +from docx.shared import Length @define(auto_attribs=True, kw_only=True) diff --git a/unittests/conftest.py b/unittests/conftest.py index 9d120388..a30fbcd7 100644 --- a/unittests/conftest.py +++ b/unittests/conftest.py @@ -1,5 +1,5 @@ -import docx # type:ignore[import] -import pytest # type:ignore[import] +import docx +import pytest from unittests.cellparagraph import CellParagraph diff --git a/unittests/test_ahb_file_finder.py b/unittests/test_ahb_file_finder.py index 0fc657e9..3f1f89af 100644 --- a/unittests/test_ahb_file_finder.py +++ b/unittests/test_ahb_file_finder.py @@ -1,6 +1,6 @@ from pathlib import Path -import pytest # type:ignore[import] +import pytest from maus.edifact import EdifactFormat from kohlrahbi.docxfilefinder import DocxFileFinder diff --git a/unittests/test_ahb_sub_table.py b/unittests/test_ahb_sub_table.py index e5586f98..2e703f43 100644 --- a/unittests/test_ahb_sub_table.py +++ b/unittests/test_ahb_sub_table.py @@ -4,8 +4,8 @@ from pathlib import Path -import docx # type:ignore[import] -from docx.table import Table # type:ignore[import] +import docx +from docx.table import Table from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable from kohlrahbi.read_functions import get_all_paragraphs_and_tables diff --git a/unittests/test_ahb_table.py b/unittests/test_ahb_table.py index 6f8384ab..80066b83 100644 --- a/unittests/test_ahb_table.py +++ b/unittests/test_ahb_table.py @@ -1,7 +1,7 @@ from pathlib import Path import pandas as pd -import pytest # type:ignore[import] +import pytest from kohlrahbi.ahbtable.ahbtable import AhbTable from kohlrahbi.unfoldedahb import UnfoldedAhb diff --git a/unittests/test_cells/test_bedingung_cell_parser.py b/unittests/test_cells/test_bedingung_cell_parser.py index c3a33588..77f3d424 100644 --- a/unittests/test_cells/test_bedingung_cell_parser.py +++ b/unittests/test_cells/test_bedingung_cell_parser.py @@ -1,6 +1,6 @@ import pandas as pd -import pytest # type:ignore[import] -from docx.shared import Twips # type:ignore[import] +import pytest +from docx.shared import Twips from kohlrahbi.docxtablecells import BedingungCell from unittests.cellparagraph import CellParagraph diff --git a/unittests/test_cells/test_body_cell_parser.py b/unittests/test_cells/test_body_cell_parser.py index b7bd78e4..3e52de75 100644 --- a/unittests/test_cells/test_body_cell_parser.py +++ b/unittests/test_cells/test_body_cell_parser.py @@ -1,6 +1,6 @@ import pandas as pd -import pytest # type:ignore[import] -from docx.shared import Length, Twips # type:ignore[import] +import pytest +from docx.shared import Length, Twips from kohlrahbi.docxtablecells import BodyCell from unittests.cellparagraph import CellParagraph diff --git a/unittests/test_cells/test_edifact_struktur_cell_parser.py b/unittests/test_cells/test_edifact_struktur_cell_parser.py index 3db3e0cd..d3aca3bd 100644 --- a/unittests/test_cells/test_edifact_struktur_cell_parser.py +++ b/unittests/test_cells/test_edifact_struktur_cell_parser.py @@ -1,6 +1,6 @@ import pandas as pd -import pytest # type:ignore[import] -from docx.shared import Twips # type:ignore[import] +import pytest +from docx.shared import Twips from kohlrahbi.docxtablecells import EdifactStrukturCell from unittests.cellparagraph import CellParagraph diff --git a/unittests/test_check_row_type.py b/unittests/test_check_row_type.py index c47b4817..3ec82527 100644 --- a/unittests/test_check_row_type.py +++ b/unittests/test_check_row_type.py @@ -1,6 +1,6 @@ -import docx # type:ignore[import] -import pytest # type:ignore[import] -from docx.shared import RGBColor # type:ignore[import] +import docx +import pytest +from docx.shared import RGBColor from kohlrahbi.row_type_checker import RowType, get_row_type diff --git a/unittests/test_cli_pruefi.py b/unittests/test_cli_pruefi.py index 284e3c8c..906d3259 100644 --- a/unittests/test_cli_pruefi.py +++ b/unittests/test_cli_pruefi.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Union -import pytest # type:ignore[import] +import pytest from click.testing import CliRunner, Result from kohlrahbi.ahb.command import ahb diff --git a/unittests/test_conditions_format_file_mapping.py b/unittests/test_conditions_format_file_mapping.py index ac6b7ca6..cb62f7c6 100644 --- a/unittests/test_conditions_format_file_mapping.py +++ b/unittests/test_conditions_format_file_mapping.py @@ -1,4 +1,4 @@ -import pytest # type:ignore[import] +import pytest from maus.edifact import EdifactFormat from kohlrahbi.conditions import find_all_files_from_all_pruefis diff --git a/unittests/test_current_state.py b/unittests/test_current_state.py index 2ec48e26..596a0416 100644 --- a/unittests/test_current_state.py +++ b/unittests/test_current_state.py @@ -8,7 +8,7 @@ from typing import Union import pandas as pd -import pytest # type:ignore[import] +import pytest from click.testing import CliRunner, Result from kohlrahbi import cli diff --git a/unittests/test_docx_extensions.py b/unittests/test_docx_extensions.py index 09e3f8bb..2620ece9 100644 --- a/unittests/test_docx_extensions.py +++ b/unittests/test_docx_extensions.py @@ -2,19 +2,21 @@ tests all the features the kohlrahbi package provides to process Docx files (by using the docx package) """ -import pytest # type:ignore[import] +from typing import Generator + +import pytest from _pytest.fixtures import SubRequest # type:ignore[import] -from docx import Document # type:ignore[import] -from docx.document import Document as DocumentClass # type:ignore[import] -from docx.table import Table # type:ignore[import] -from docx.text.paragraph import Paragraph # type:ignore[import] +from docx import Document +from docx.document import Document as DocumentClass +from docx.table import Table +from docx.text.paragraph import Paragraph from kohlrahbi.read_functions import get_all_paragraphs_and_tables class TestDocxExtensions: @pytest.fixture - def create_docx_from_filename(self, request: SubRequest, datafiles) -> DocumentClass: + def create_docx_from_filename(self, request: SubRequest, datafiles) -> Generator[DocumentClass, None, None]: """a fixture to quickly instantiate a docx.Document from a given docx file name""" docx_file_name = request.param docx_file_path = datafiles / docx_file_name diff --git a/unittests/test_input_checks.py b/unittests/test_input_checks.py index 3fe11de9..1570f7ca 100644 --- a/unittests/test_input_checks.py +++ b/unittests/test_input_checks.py @@ -1,4 +1,4 @@ -import pytest # type:ignore[import] +import pytest from kohlrahbi.ahb import get_valid_pruefis diff --git a/unittests/test_read_functions.py b/unittests/test_read_functions.py index b42d95cb..a5cdf25d 100644 --- a/unittests/test_read_functions.py +++ b/unittests/test_read_functions.py @@ -1,9 +1,8 @@ import json -import os from pathlib import Path -import docx # type:ignore[import] -import pytest # type:ignore[import] +import docx +import pytest from docx import Document from maus.edifact import EdifactFormat, EdifactFormatVersion diff --git a/unittests/test_table_header.py b/unittests/test_table_header.py index 5ed059d7..b8640d6a 100644 --- a/unittests/test_table_header.py +++ b/unittests/test_table_header.py @@ -2,7 +2,7 @@ Test the table_header module. """ -import pytest # type:ignore[import] +import pytest from kohlrahbi.table_header import create_mapping_of_tabstop_positions