Skip to content

Commit

Permalink
Core code
Browse files Browse the repository at this point in the history
  • Loading branch information
mephenor committed Oct 14, 2024
1 parent 4f5913b commit ee8240c
Show file tree
Hide file tree
Showing 17 changed files with 292 additions and 102 deletions.
4 changes: 2 additions & 2 deletions src/metldata/builtin_transformations/common/path/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def __init__(self, *, path_str: str):
"""Construct relation path from a string-based representation."""
self.path_str = clean_path_str(path_str=path_str)
self.elements = path_str_to_object_elements(path_str=self.path_str)
self.source = self.elements[0].source
self.target = self.elements[-1].target
self.source = self.elements[0].lhs
self.target = self.elements[-1].rhs

@classmethod
def validate(cls, value, info: ValidationInfo) -> "RelationPath":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ class RelationPathElement(BaseModel):
+ " RelationPathElementType enum."
),
)
source: str = Field(
lhs: str = Field(
..., description="The name of the source class that is referencing."
)
target: str = Field(
rhs: str = Field(
..., description="The name of the target class that is referenced."
)
property: str = Field(
Expand Down
12 changes: 6 additions & 6 deletions src/metldata/builtin_transformations/common/path/path_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def split_first_element(*, path_str: str) -> tuple[str, str | None]:


def get_string_elements(*, path_str: str) -> list[str]:
"""Decomposes a path string into elements in string repesentation. The path_str is
"""Decomposes a path string into elements in string representation. The path_str is
assumed to be cleaned.
"""
elements: list[str] = []
Expand Down Expand Up @@ -161,21 +161,21 @@ def get_element_components(*, string_element: str) -> tuple[str, str, str]:
string_element_cleaned = string_element.replace(">", "").replace("<", "")

# extract the source:
source, slot_and_target = string_element_cleaned.split("(")
lhs, slot_and_target = string_element_cleaned.split("(")

# extract slot and target:
slot, target = slot_and_target.split(")")
slot, rhs = slot_and_target.split(")")

return source, slot, target
return lhs, slot, rhs


def string_element_to_object(string_element: str) -> RelationPathElement:
"""Translates a string-based path element into an object-based representation."""
validate_string_element(string_element)
type_ = get_element_type(string_element=string_element)
source, slot, target = get_element_components(string_element=string_element)
lhs, slot, rhs = get_element_components(string_element=string_element)

return RelationPathElement(type_=type_, source=source, property=slot, target=target)
return RelationPathElement(type_=type_, lhs=lhs, property=slot, rhs=rhs)


def path_str_to_object_elements(path_str: str) -> list[RelationPathElement]:
Expand Down
98 changes: 97 additions & 1 deletion src/metldata/builtin_transformations/copy_content/assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,17 @@

from schemapack.spec.schemapack import SchemaPack

from metldata.builtin_transformations.add_content_properties.path import (
resolve_schema_object_path,
)
from metldata.builtin_transformations.copy_content.instruction import (
CopyContentInstruction,
)
from metldata.builtin_transformations.copy_content.path import (
RelationPathGraph,
RelationPathNode,
)
from metldata.transform.base import ModelAssumptionError


def check_model_assumptions(
Expand All @@ -29,4 +37,92 @@ def check_model_assumptions(
"""Check the model assumptions for the count references transformation."""
for _, instructions in instructions_by_class.items():
for instruction in instructions:
...
assert_relation_path_constraints(schema=schema, instruction=instruction)
assert_source_and_property_exist(schema=schema, instruction=instruction)
assert_target_content_exists(schema=schema, instruction=instruction)


def assert_relation_path_constraints(
*, schema: SchemaPack, instruction: CopyContentInstruction
):
"""Assert that a relation path is parsable and fulfills the constraints, i.e.
only one source and target are specified and the target is reachable from the source.
"""
try:
relations = RelationPathGraph(instruction.source.relation_path)
except ValueError as exc:
raise ModelAssumptionError(str(exc)) from exc

for next_class, property in relations.first.points_to.items():
_check_class_property_existence(
schema=schema, node=next_class, property=property
)


def _check_class_property_existence(
*, schema: SchemaPack, node: RelationPathNode, property: str
):
"""Recursively check that classes and associated properties along a relation path do exist."""
# check that the source class exists
node_class = schema.classes.get(node.name)
if not node_class:
raise ModelAssumptionError(
f"No class with name {node.name} present in the model, but specified in relation path."
)

# check associated property exists
node_schema = node_class.content.json_schema_dict
if not property in node_schema.get("properties", {}):
raise ModelAssumptionError(
f"No property with name {property} present for class {node.name} in the model, "
"but specified in relation path."
)

for next_class, property in node.points_to.items():
_check_class_property_existence(
schema=schema, node=next_class, property=property
)


def assert_source_and_property_exist(
*, schema: SchemaPack, instruction: CopyContentInstruction
):
"""Check that both the source class and property exist."""
relation_graph = RelationPathGraph(instruction.source.relation_path)
source_class = relation_graph.last
source_content_path = instruction.source.content_path

# check that the source class exists
source_class_def = schema.classes.get(source_class.name)
if not source_class_def:
raise ModelAssumptionError(
f"No class with name {source_class.name} present in the model."
)

# check source content schema contains the given content path
source_content_schema = source_class_def.content.json_schema_dict
try:
_ = resolve_schema_object_path(source_content_schema, source_content_path)
except KeyError as exc:
raise ModelAssumptionError(
f"Could not find content path {source_content_path} within the {source_class.name} class."
) from exc


def assert_target_content_exists(
*, schema: SchemaPack, instruction: CopyContentInstruction
):
"""Check that the target class exists and the property is not set yet."""
target_property_name = instruction.target_content.property_name
target_class = schema.classes.get(instruction.class_name)

if not target_class:
raise ModelAssumptionError(
f"Target class {instruction.class_name} not present in model."
)

content_schema = target_class.content.json_schema_dict

# check for property existence
if target_property_name in content_schema.get("properties", {}):
raise ModelAssumptionError(f"Property {target_property_name} already present.")
4 changes: 1 addition & 3 deletions src/metldata/builtin_transformations/copy_content/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def instructions_by_class(
self,
) -> dict[str, list[CopyContentInstruction]]:
"""Returns a dictionary of instructions by class (i.e. config for each class)."""
instructions_by_class: dict[
str, list[CopyContentInstruction]
] = {}
instructions_by_class: dict[str, list[CopyContentInstruction]] = {}
for instruction in self.copy_content:
instructions_by_class.setdefault(instruction.class_name, []).append(
instruction
Expand Down
33 changes: 23 additions & 10 deletions src/metldata/builtin_transformations/copy_content/data_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@

from schemapack.spec.datapack import DataPack

from metldata.builtin_transformations.common.path.path_elements import (
RelationPathElementType,
)
from metldata.builtin_transformations.copy_content.instruction import (
CopyContentInstruction,
)
from metldata.builtin_transformations.copy_content.path import RelationPathGraph
from metldata.transform.base import EvitableTransformationError


Expand Down Expand Up @@ -53,16 +51,31 @@ def copy_content(
for instruction in instructions:
content = target_resource.content
target_property_name = instruction.target_content.property_name
relation_graph = RelationPathGraph(instruction.source.relation_path)

if target_property_name in content:
raise EvitableTransformationError()
source_resource_name = relation_graph.last.name

# fetch property schema to copy
source_resources = modified_data.resources.get(source_resource_name)
if not source_resources:
raise EvitableTransformationError()

if len(source_resources) != 1:
raise ValueError(
"Expected exactly one resource to copy from, but found multiple."
)
source_resource = next(iter(source_resources.values()))

source_content = source_resource.content
for path_elem in instruction.source.content_path.split("."):
source_content = source_content.get(path_elem)
if not source_content:
raise EvitableTransformationError()

# TODO: simplified path handling for now
last_path_element = instruction.source.relation_path.elements[-1]
if last_path_element.type_ == RelationPathElementType.ACTIVE:
source_class_name = last_path_element.target
else:
source_class_name = last_path_element.source
property_name = last_path_element.property
content.setdefault("properties", {})[target_property_name] = (
source_content
)

return modified_data
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ class CopyContentInstruction(BaseSettings):
classes are connected.
"""

class_name: str = Field(...,
description="The name of the class to modify.")
class_name: str = Field(..., description="The name of the class to modify.")

target_content: CopyTarget = Field(
...,
Expand Down
2 changes: 1 addition & 1 deletion src/metldata/builtin_transformations/copy_content/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,5 @@ def transform_model(model: SchemaPack, config: CopyContentConfig) -> SchemaPack:
config_cls=CopyContentConfig,
check_model_assumptions=check_model_assumptions_wrapper,
transform_model=transform_model,
data_transformer_factory=CopyContentTransformer
data_transformer_factory=CopyContentTransformer,
)
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@

from schemapack.spec.schemapack import ClassDefinition, SchemaPack

from metldata.builtin_transformations.common.path.path_elements import (
RelationPathElementType,
from metldata.builtin_transformations.add_content_properties.path import (
resolve_schema_object_path,
)
from metldata.builtin_transformations.copy_content.instruction import (
CopyContentInstruction,
)
from metldata.builtin_transformations.copy_content.path import RelationPathGraph
from metldata.transform.base import EvitableTransformationError


Expand All @@ -47,30 +48,28 @@ def add_copy_content(
for instruction in instructions:
# check for property existence
target_property_name = instruction.target_content.property_name
relation_graph = RelationPathGraph(instruction.source.relation_path)
if target_property_name in content_schema.get("properties", {}):
raise EvitableTransformationError()

# extract schema information that needs to be copied
# TODO: simplified path handling for now
last_path_element = instruction.source.relation_path.elements[-1]
if last_path_element.type_ == RelationPathElementType.ACTIVE:
source_class_name = last_path_element.target
else:
source_class_name = last_path_element.source
property_name = last_path_element.property
source_class = relation_graph.last
source_content_path = instruction.source.content_path

# fetch property schema to copy
source_class_def = model.classes.get(source_class_name)
source_class_def = model.classes.get(source_class.name)
if not source_class_def:
raise EvitableTransformationError()

source_content_schema = source_class_def.content.json_schema_dict
source_properties = source_content_schema.get("properties", {})
if property_name not in source_properties:
raise EvitableTransformationError()
try:
property_schema = resolve_schema_object_path(
source_content_schema, source_content_path
)
except KeyError as exc:
raise EvitableTransformationError() from exc

# add property schema to target class
property_schema = source_properties[property_name]
content_schema.setdefault("properties", {})[target_property_name] = (
deepcopy(property_schema)
)
Expand Down
Loading

0 comments on commit ee8240c

Please sign in to comment.