Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract 5 digit Segment ID from (some) ≥FV2410 AHBs #309

Merged
merged 6 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/kohlrahbi/unfoldedahb/unfoldedahbline.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class UnfoldedAhbLine(BaseModel):
segment_name: str # Ansprechpartner
segment_gruppe: str | None # SG3
segment: str | None # CTA
segment_id: str | None = None # 00009
datenelement: str | None # 3055
code: str | None # IC
qualifier: str | None # Name vom Ansprechpartner
Expand Down
26 changes: 25 additions & 1 deletion src/kohlrahbi/unfoldedahb/unfoldedahbtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import copy
import json
import re
from functools import lru_cache
from pathlib import Path
from uuid import uuid4

Expand All @@ -26,6 +27,7 @@
from kohlrahbi.unfoldedahb.unfoldedahbtablemetadata import UnfoldedAhbTableMetaData

_segment_group_pattern = re.compile(r"^SG\d+$")
_segment_id_pattern = re.compile(r"^\d{5}$")


def _lines_are_equal_when_ignoring_guid(line1: AhbLine, line2: AhbLine) -> bool:
Expand All @@ -39,6 +41,24 @@ def _lines_are_equal_when_ignoring_guid(line1: AhbLine, line2: AhbLine) -> bool:
return line1_copy == line2_copy


@lru_cache
def _split_data_element_and_segment_id(value: str | None) -> tuple[str | None, str | None]:
"""
returns the data element id and segment id
"""
if value is None:
return None, None
datenelement_id: str | None
segment_id: str | None
if _segment_id_pattern.match(value):
datenelement_id = None
segment_id = value
else:
datenelement_id = value
segment_id = None
return datenelement_id, segment_id


def _keep_guids_of_unchanged_lines_stable(
updated_ahb: FlatAnwendungshandbuch, existing_ahb: FlatAnwendungshandbuch
) -> None:
Expand Down Expand Up @@ -159,7 +179,8 @@ def from_ahb_table(cls, ahb_table: AhbTable, pruefi: str) -> "UnfoldedAhb":
segment_name=current_section_name,
segment_gruppe=row["Segment Gruppe"] or None,
segment=row["Segment"] or None,
datenelement=row["Datenelement"] or None,
datenelement=_split_data_element_and_segment_id(row["Datenelement"])[0],
segment_id=_split_data_element_and_segment_id(row["Datenelement"])[1],
Comment on lines +182 to +183
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ich gebe zu, das ist mittel hässlich. aber so hat pylint nicht gejammert, es gäbe zu viele locals ;)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough ;)

code=value_pool_entry,
qualifier="",
beschreibung=description,
Expand Down Expand Up @@ -284,6 +305,8 @@ def _is_just_segment(ahb_row: pd.Series) -> bool: # type:ignore[type-arg]
and not ahb_row["Datenelement"]
):
return True
if ahb_row["Datenelement"] is not None and _segment_id_pattern.match(ahb_row["Datenelement"]):
return True
return False

@staticmethod
Expand Down Expand Up @@ -325,6 +348,7 @@ def convert_to_flat_ahb(self) -> FlatAnwendungshandbuch:
segment_group_key=unfolded_ahb_line.segment_gruppe,
segment_code=unfolded_ahb_line.segment,
data_element=unfolded_ahb_line.datenelement,
segment_id=unfolded_ahb_line.segment_id,
value_pool_entry=unfolded_ahb_line.code,
name=unfolded_ahb_line.beschreibung or unfolded_ahb_line.qualifier,
ahb_expression=unfolded_ahb_line.bedingung_ausdruck,
Expand Down
Binary file not shown.
51 changes: 50 additions & 1 deletion unittests/test_ahb_sub_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
from pathlib import Path

import docx
import pytest
from docx.table import Table

from kohlrahbi.ahbtable.ahbsubtable import AhbSubTable
from kohlrahbi.read_functions import get_all_paragraphs_and_tables
from kohlrahbi.read_functions import get_ahb_table, get_all_paragraphs_and_tables
from kohlrahbi.unfoldedahb import UnfoldedAhb


class TestAhbSubTable:
Expand Down Expand Up @@ -41,3 +43,50 @@ def test_from_table(self) -> None:
assert isinstance(ahb_sub_table, AhbSubTable)
else:
raise TypeError("You did not pass a docx table instance.")

@pytest.mark.parametrize(
"docx_path, segment_id, segment_code",
[
pytest.param(
Path(__file__).parent
/ Path(
# pylint: disable=line-too-long
"test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx"
),
"00003",
"UNH",
),
pytest.param(
Path(__file__).parent
/ Path(
# pylint: disable=line-too-long
"test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx"
),
"00004",
"BGM",
),
pytest.param(
Path(__file__).parent
/ Path(
# pylint: disable=line-too-long
"test-files/docx_files/UTILMDAHBStrom-informatorischeLesefassung1.2aKonsolidierteLesefassungmitFehlerkorrekturenStand05.04.2024_99991231_20240405.docx"
),
"00540",
"UNT",
),
],
)
def test_segment_id_parsing(self, docx_path: Path, segment_id: str, segment_code: str) -> None:
"""
https://github.com/Hochfrequenz/kohlrahbi/issues/304
"""
assert docx_path.exists()
doc = docx.Document(str(docx_path)) # Creating word reader object.
ahb_table = get_ahb_table(document=doc, pruefi="55109")
assert ahb_table is not None
unfolded_ahb = UnfoldedAhb.from_ahb_table(ahb_table=ahb_table, pruefi="55109")
assert unfolded_ahb is not None
flat_ahb = unfolded_ahb.convert_to_flat_ahb()
assert flat_ahb is not None
assert any(l for l in flat_ahb.lines if l.segment_id is not None)
assert any(l for l in flat_ahb.lines if l.segment_id == segment_id and l.segment_code == segment_code)