Merge branch 'main' into hf-kklein-patch-1

Hochfrequenz · May 17, 2024 · 92332ee · 92332ee
2 parents 25adea0 + 3dbc4e7
commit 92332ee
Show file tree

Hide file tree

Showing 44 changed files with 157 additions and 133 deletions.
diff --git a/dev_requirements/requirements-linting.txt b/dev_requirements/requirements-linting.txt
@@ -5,7 +5,7 @@
 #
 #    pip-compile-multi
 #
-astroid==3.1.0
+astroid==3.2.1
     # via pylint
 dill==0.3.8
     # via pylint
@@ -15,7 +15,7 @@ mccabe==0.7.0
     # via pylint
 platformdirs==4.2.0
     # via pylint
-pylint==3.1.1
+pylint==3.2.0
     # via -r dev_requirements/requirements-linting.in
 tomlkit==0.12.5
     # via pylint
diff --git a/requirements.txt b/requirements.txt
@@ -40,7 +40,7 @@ pydantic-core==2.18.2
     # via pydantic
 python-dateutil==2.9.0.post0
     # via pandas
-python-docx==1.1.0
+python-docx==1.1.2
     # via kohlrahbi (pyproject.toml)
 pytz==2024.1
     # via

diff --git a/src/kohlrahbi/__init__.py b/src/kohlrahbi/__init__.py
@@ -12,7 +12,7 @@
 
 @click.group()
 @click.version_option(version=version)
-def cli():
+def cli() -> None:
     """Kohlrahbi CLI tool"""
 
 

diff --git a/src/kohlrahbi/ahb/__init__.py b/src/kohlrahbi/ahb/__init__.py
@@ -9,10 +9,10 @@
 from typing import Any, Dict, Optional
 
 import click
-import docx  # type: ignore
+import docx
 import tomlkit
-from docx.document import Document  # type:ignore[import]
-from docx.table import Table  # type:ignore[import]
+from docx.document import Document
+from docx.table import Table
 from maus.edifact import EdifactFormatVersion
 
 from kohlrahbi.ahbtable.ahbtable import AhbTable
@@ -48,7 +48,7 @@ def process_ahb_table(
     pruefi: str,
     output_path: Path,
     file_type: str,
-):
+) -> None:
     """
     Process the ahb table.
     """
@@ -98,7 +98,7 @@ def process_pruefi(
     path_to_ahb_docx_file: Path,
     output_path: Path,
     file_type: str,
-):
+) -> None:
     """
     Process one pruefi.
     If the input path ends with .docx, we assume that the file containing the pruefi is given.
@@ -183,7 +183,7 @@ def extract_pruefis_from_table(table: Table) -> list[str]:
 
 def table_header_contains_text_pruefidentifikator(table: Table) -> bool:
     """Checks if the table header contains the text 'Prüfidentifikator'."""
-    return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator")
+    return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator")  # type:ignore[no-any-return]
 
 
 def get_pruefi_to_file_mapping(basic_input_path: Path, format_version: EdifactFormatVersion) -> dict[str, str]:

diff --git a/src/kohlrahbi/ahb/command.py b/src/kohlrahbi/ahb/command.py
@@ -12,7 +12,7 @@
 from kohlrahbi.enums.ahbexportfileformat import AhbExportFileFormat
 
 
-def check_python_version():
+def check_python_version() -> None:
     """
     Check if the Python interpreter is greater or equal to 3.11
     """
@@ -24,7 +24,7 @@ def check_python_version():
 
 
 # pylint: disable=unused-argument
-def validate_path(ctx, param, value):
+def validate_path(ctx, param, value) -> Path:  # type:ignore[no-untyped-def]
     """
     Ensure the path exists or offer to create it.
     """
@@ -97,7 +97,7 @@ def ahb(
     format_version: EdifactFormatVersion | str,
     assume_yes: bool,  # pylint: disable=unused-argument
     # it is used by the callback function of the output-path
-):
+) -> None:
     """
     Scrape AHB documents for pruefidentifikatoren.
     This is a command line interface for the pruefis module.

diff --git a/src/kohlrahbi/ahbtable/ahbcondtions.py b/src/kohlrahbi/ahbtable/ahbcondtions.py
@@ -4,7 +4,7 @@
 import re
 from pathlib import Path
 
-from docx.table import Table as DocxTable  # type: ignore[import-untyped]
+from docx.table import Table as DocxTable
 from maus.edifact import EdifactFormat
 from pydantic import BaseModel, ConfigDict
 
@@ -53,10 +53,11 @@ def collect_conditions(
         logger.info("The package conditions for %s were collected.", edifact_format)
         return conditions_dict
 
-    def include_condition_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None:
+    def include_condition_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None:
         """ " Include a dict of conditions to the conditions_dict"""
         if to_add is None:
             logger.info("Conditions dict to be added is empty.")
+            return
         for edifact_format, edi_cond_dict in to_add.items():
             for condition_key, condition_text in edi_cond_dict.items():
                 if edifact_format in self.conditions_dict:
@@ -117,8 +118,7 @@ def parse_conditions_from_string(
         # check whether condition was already collected:
         existing_text = conditions_dict[edifact_format].get(match[0])
         is_condition_key_collected_yet = existing_text is not None
-        if is_condition_key_collected_yet and existing_text is not None:
-            key_exits_but_shorter_text = len(text) > len(existing_text)
+        key_exits_but_shorter_text = existing_text is not None and len(text) > len(existing_text)
         if not is_condition_key_collected_yet or key_exits_but_shorter_text:
             conditions_dict[edifact_format][match[0]] = text
     return conditions_dict
diff --git a/src/kohlrahbi/ahbtable/ahbpackagetable.py b/src/kohlrahbi/ahbtable/ahbpackagetable.py
@@ -7,7 +7,7 @@ class which contains AHB package condition table
 from pathlib import Path
 
 import pandas as pd
-from docx.table import Table as DocxTable  # type: ignore[import-untyped]
+from docx.table import Table as DocxTable
 from maus.edifact import EdifactFormat
 from pydantic import BaseModel, ConfigDict
 
@@ -52,7 +52,7 @@ def provide_conditions(self, edifact_format: EdifactFormat) -> dict[EdifactForma
         logger.info("The package conditions for %s were collected.", edifact_format)
         return conditions_dict
 
-    def provide_packages(self, edifact_format: EdifactFormat):
+    def provide_packages(self, edifact_format: EdifactFormat) -> None:
         """collect conditions from package table and store them in conditions dict."""
         package_dict: dict[EdifactFormat, dict[str, str]] = {edifact_format: {}}
 
@@ -71,20 +71,20 @@ def provide_packages(self, edifact_format: EdifactFormat):
                     # check whether package was already collected:
                     existing_text = package_dict[edifact_format].get(package)
                     is_package_key_collected_yet = existing_text is not None
-                    if is_package_key_collected_yet:
-                        key_exits_but_shorter_text = len(package_conditions) > len(
-                            existing_text  # type: ignore[arg-type]
-                        )  # type: ignore[arg-type]
+                    key_exits_but_shorter_text = existing_text is not None and len(package_conditions) > len(
+                        existing_text
+                    )
                     if not is_package_key_collected_yet or key_exits_but_shorter_text:
                         package_dict[edifact_format][package] = package_conditions
 
         logger.info("Packages for %s were collected.", edifact_format)
         self.package_dict = package_dict
 
-    def include_package_dict(self, to_add=dict[EdifactFormat, dict[str, str]] | None) -> None:
+    def include_package_dict(self, to_add: dict[EdifactFormat, dict[str, str]] | None) -> None:
         """Include a dict of conditions to the conditions_dict"""
         if to_add is None:
             logger.info("Packages dict to be added is empty.")
+            return
         for edifact_format, edi_cond_dict in to_add.items():
             for package_key, package_conditions in edi_cond_dict.items():
                 if edifact_format in self.package_dict:

diff --git a/src/kohlrahbi/ahbtable/ahbsubtable.py b/src/kohlrahbi/ahbtable/ahbsubtable.py
@@ -5,12 +5,13 @@
 from typing import Generator
 
 import pandas as pd
-from docx.table import Table as DocxTable  # type:ignore[import]
-from docx.table import _Cell  # type:ignore[import]
+from docx.table import Table as DocxTable
+from docx.table import _Cell, _Row
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.ahbtable.ahbtablerow import AhbTableRow
-from kohlrahbi.row_type_checker import RowType, get_row_type
+from kohlrahbi.enums import RowType
+from kohlrahbi.row_type_checker import get_row_type
 from kohlrahbi.seed import Seed
 
 
@@ -122,7 +123,7 @@ def from_headless_table(cls, tmd: Seed, docx_table: DocxTable) -> "AhbSubTable":
         return cls(table_meta_data=tmd, table=ahb_table_dataframe)
 
     @staticmethod
-    def iter_visible_cells(row) -> Generator[_Cell, None, None]:
+    def iter_visible_cells(row: _Row) -> Generator[_Cell, None, None]:
         """
         This function makes sure that you will iterate over the cells you see in the word document.
         For more information go to https://github.com/python-openxml/python-docx/issues/970#issuecomment-877386927

diff --git a/src/kohlrahbi/ahbtable/ahbtable.py b/src/kohlrahbi/ahbtable/ahbtable.py
@@ -91,7 +91,7 @@ def append_ahb_sub_table(self, ahb_sub_table: AhbSubTable) -> None:
             self.table = pd.concat([self.table, ahb_sub_table.table], ignore_index=True)
 
     @staticmethod
-    def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool:
+    def line_contains_only_segment_gruppe(raw_line: pd.Series) -> bool:  # type:ignore[type-arg]
         """
         Returns true if the given raw line only contains some meaningful data in the "Segment Gruppe" key
         """
@@ -112,6 +112,7 @@ def sanitize(self) -> None:
         iterable_ahb_table = peekable(self.table.iterrows())
         self.table.reset_index(drop=True, inplace=True)
         for _, row in iterable_ahb_table:
+            # pylint: disable=unpacking-non-sequence # it is a tuple indeed
             index_of_next_row, next_row = iterable_ahb_table.peek(
                 (
                     0,

diff --git a/src/kohlrahbi/ahbtable/ahbtablerow.py b/src/kohlrahbi/ahbtable/ahbtablerow.py
@@ -5,11 +5,11 @@
 from typing import Optional
 
 import pandas as pd
-from docx.table import _Cell  # type:ignore[import]
+from docx.table import _Cell
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.docxtablecells import BedingungCell, BodyCell, EdifactStrukturCell
-from kohlrahbi.row_type_checker import RowType
+from kohlrahbi.enums import RowType
 from kohlrahbi.seed import Seed
 
 
@@ -46,7 +46,9 @@ def parse(
             dtype="str",
         )
         # pylint: disable=unsubscriptable-object, no-member
-        empty_row: pd.Series[str] = pd.Series(len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers)
+        empty_row: pd.Series = pd.Series(  # type:ignore[type-arg]
+            len(ahb_row_dataframe.columns) * [""], index=self.seed.column_headers
+        )
 
         ahb_row_dataframe = pd.concat([ahb_row_dataframe, empty_row.to_frame().T], ignore_index=True)
 

diff --git a/src/kohlrahbi/changehistory/__init__.py b/src/kohlrahbi/changehistory/__init__.py
@@ -13,10 +13,10 @@
 from pathlib import Path
 from typing import Optional
 
-import docx  # type: ignore
+import docx
 import pandas as pd
-from docx.document import Document  # type:ignore[import]
-from docx.table import Table  # type: ignore
+from docx.document import Document
+from docx.table import Table
 
 from kohlrahbi.changehistory.changehistorytable import ChangeHistoryTable
 from kohlrahbi.docxfilefinder import DocxFileFinder

diff --git a/src/kohlrahbi/changehistory/changehistorytable.py b/src/kohlrahbi/changehistory/changehistorytable.py
@@ -3,7 +3,7 @@
 """
 
 import pandas as pd
-from docx.table import Table  # type:ignore[import]
+from docx.table import Table
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable
@@ -54,7 +54,7 @@ def is_empty(val: str) -> bool:
             return pd.isna(val) or val == ""
 
         # Define a function to check if a value is considered empty for our case
-        def is_the_first_column_empty(row: pd.Series) -> bool:
+        def is_the_first_column_empty(row: pd.Series) -> bool:  # type:ignore[type-arg]
             """
             Checks if the first column of the given row is empty.
             This is our indicator if the current row is a continuation of the upper row.

diff --git a/src/kohlrahbi/changehistory/command.py b/src/kohlrahbi/changehistory/command.py
@@ -47,7 +47,7 @@ def changehistory(
     format_version: EdifactFormatVersion | str,
     assume_yes: bool,  # pylint: disable=unused-argument
     # it is used by the callback function of the output-path
-):
+) -> None:
     """
     Scrape change histories from the input path and save them to the output path.
 

diff --git a/src/kohlrahbi/conditions/__init__.py b/src/kohlrahbi/conditions/__init__.py
@@ -4,7 +4,7 @@
 
 from pathlib import Path
 
-import docx  # type: ignore[import-untyped]
+import docx
 from maus.edifact import EdifactFormat, EdifactFormatVersion, get_format_of_pruefidentifikator
 
 from kohlrahbi.ahb import get_pruefi_to_file_mapping
@@ -45,8 +45,8 @@ def scrape_conditions(
     for edifact_format, files in all_format_files.items():
         for file in files:
             # pylint: disable=too-many-function-args
-            # type: ignore[call-arg, arg-type]
-            doc = docx.Document(basic_input_path / path_to_file / Path(file))
+            path: Path = basic_input_path / path_to_file / Path(file)
+            doc = docx.Document(str(path.absolute()))
             logger.info("Start scraping conditions for %s in %s", edifact_format, file)
             if not doc:
                 logger.error("Could not open file %s as docx", Path(file))

diff --git a/src/kohlrahbi/conditions/command.py b/src/kohlrahbi/conditions/command.py
@@ -28,7 +28,7 @@
 from kohlrahbi.conditions import scrape_conditions
 
 
-def check_python_version():
+def check_python_version() -> None:
     """
     Check if the Python interpreter is greater or equal to 3.11
     """
@@ -40,7 +40,7 @@ def check_python_version():
 
 
 # pylint: disable=unused-argument
-def validate_path(ctx, param, value):
+def validate_path(ctx, param, value) -> Path:  # type:ignore[no-untyped-def]
     """
     Ensure the path exists or offer to create it.
     """
@@ -91,7 +91,7 @@ def validate_path(ctx, param, value):
 )
 def conditions(
     edi_energy_mirror_path: Path, output_path: Path, format_version: EdifactFormatVersion | str, assume_yes: bool
-):
+) -> None:
     """
     Scrape AHB documents for conditions.
     """

diff --git a/src/kohlrahbi/docxfilefinder.py b/src/kohlrahbi/docxfilefinder.py
@@ -138,7 +138,7 @@ def filter_docx_files_for_edifact_format(self, edifact_format: EdifactFormat) ->
 
         self.paths_to_docx_files = [path for path in self.paths_to_docx_files if str(edifact_format) in path.name]
 
-    def remove_temporary_files(self):
+    def remove_temporary_files(self) -> None:
         """
         This method removes all temporary files from paths_to_docx_files.
         Temporary files lead to the exception `BadZipFile: File is not a zip file`.

diff --git a/src/kohlrahbi/docxtablecells/__init__.py b/src/kohlrahbi/docxtablecells/__init__.py
@@ -5,3 +5,5 @@
 from .bedinungscell import BedingungCell
 from .bodycell import BodyCell
 from .edifactstrukturcell import EdifactStrukturCell
+
+__all__ = ["BedingungCell", "BodyCell", "EdifactStrukturCell"]
diff --git a/src/kohlrahbi/docxtablecells/bedinungscell.py b/src/kohlrahbi/docxtablecells/bedinungscell.py
@@ -5,7 +5,7 @@
 import re
 
 import pandas as pd
-from docx.table import _Cell  # type:ignore[import]
+from docx.table import _Cell
 from pydantic import BaseModel, ConfigDict
 
 

diff --git a/src/kohlrahbi/docxtablecells/bodycell.py b/src/kohlrahbi/docxtablecells/bodycell.py
@@ -3,7 +3,8 @@
 """
 
 import pandas as pd
-from docx.table import _Cell  # type:ignore[import]
+from docx.table import _Cell
+from docx.text.paragraph import Paragraph
 from maus.reader.flat_ahb_reader import FlatAhbCsvReader
 from pydantic import BaseModel, ConfigDict
 
@@ -103,9 +104,9 @@ def parse(self, ahb_row_dataframe: pd.DataFrame) -> pd.DataFrame:
 
         return ahb_row_dataframe
 
-    def has_paragraph_tabstops(self, paragraph) -> bool:
+    def has_paragraph_tabstops(self, paragraph: Paragraph) -> bool:
         """
         Checks if the given paragraph contains tabstops
         """
         tab_stops = list(paragraph.paragraph_format.tab_stops)
-        return len(tab_stops) > 0
+        return any(tab_stops)
diff --git a/src/kohlrahbi/docxtablecells/edifactstrukturcell.py b/src/kohlrahbi/docxtablecells/edifactstrukturcell.py
@@ -5,7 +5,7 @@
 import re
 
 import pandas as pd
-from docx.table import _Cell  # type:ignore[import]
+from docx.table import _Cell
 from pydantic import BaseModel, ConfigDict
 
 _segment_group_pattern = re.compile(r"^SG\d+$")

diff --git a/src/kohlrahbi/enums/__init__.py b/src/kohlrahbi/enums/__init__.py
@@ -4,3 +4,5 @@
 
 from .row_type import RowType
 from .row_type_color import RowTypeColor
+
+__all__ = ["RowType", "RowTypeColor"]