Skip to content

Commit

Permalink
Merge pull request #21 from webinterpret-ds/DS-3238_add_compound_enti…
Browse files Browse the repository at this point in the history
…ty_support

Add compound entity support
  • Loading branch information
piotrcichacki authored Jun 3, 2024
2 parents 5d4ecf9 + e11bfb1 commit 2627f73
Show file tree
Hide file tree
Showing 21 changed files with 862 additions and 200 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = data-snack
version = 1.0.4
version = 1.0.5
author = webinterpret-datascience
author_email = data-science@webinterpret.com
description =
Expand Down
3 changes: 2 additions & 1 deletion src/data_snack/entities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .entity import Entity
from .base import Entity
from .compound import CompoundEntity
from .registry import EntityRegistry
File renamed without changes.
60 changes: 60 additions & 0 deletions src/data_snack/entities/compound.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from abc import ABC
from collections import ChainMap
from dataclasses import dataclass
from itertools import chain
from typing import List, Any, Type

from data_snack.entities import Entity
from data_snack.entities.entity_meta import CompoundEntityMetaClass
from data_snack.entities.models import SourceEntity
from data_snack.entities.utils import map_values, filter_missing_values


@dataclass
class CompoundEntity(ABC, metaclass=CompoundEntityMetaClass):
def __init__(self, *args: Any, **kwargs: Any):
...

class Meta:
sources: List[SourceEntity] = []

@classmethod
def get_all_fields(cls) -> List[str]:
"""Gets all CompoundEntity fields."""
return filter_missing_values(list(chain(*[
map_values(source.source_fields_mapping, source.entity.get_all_fields())
for source in cls.Meta.sources
])))

@classmethod
def get_fields(cls) -> List[str]:
"""Gets CompoundEntity fields if not excluded."""
return filter_missing_values(list(chain(*[
map_values(source.source_fields_mapping, source.entity.get_fields())
for source in cls.Meta.sources
])))

@classmethod
def get_excluded_fields(cls) -> List[str]:
"""Gets CompoundEntity excluded keys only."""
return filter_missing_values(list(chain(*[
map_values(source.source_fields_mapping, source.entity.get_excluded_fields())
for source in cls.Meta.sources
])))

@classmethod
def get_keys(cls) -> List[str]:
"""Gets CompoundEntity keys only."""
return filter_missing_values(list(chain(*[
map_values(source.source_fields_mapping, source.entity.get_keys())
for source in cls.Meta.sources
])))

@classmethod
def create_from_source_entities(cls, entities: List[Entity]) -> "CompoundEntity":
"""Creates CompoundEntity from source entities."""
return cls(**dict(ChainMap(*[
{source.source_fields_mapping[field]: value for field, value in vars(entity).items()}
for source in cls.Meta.sources
for entity in entities if isinstance(entity, source.entity)
])))
49 changes: 33 additions & 16 deletions src/data_snack/entities/entity_meta.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,44 @@
from abc import ABC, ABCMeta

from .exceptions import (MetaEmptyKeysException, MetaFieldsException,
NonExistingMetaError)
from data_snack.entities.validation import (
validate_meta_class,
validate_meta_fields,
validate_meta_keys,
validate_meta_sources,
validate_meta_sources_fields,
validate_meta_sources_keys,
)


class EntityMetaClass(ABCMeta):
class MetaClass(ABCMeta):

def __new__(mcs, name, bases, dct):
entity_class = super().__new__(mcs, name, bases, dct)
validate_meta_class(entity_class)
return entity_class


class EntityMetaClass(MetaClass):

meta_fields = ["keys", "excluded_fields", "version"]

def __new__(mcs, name, bases, dct):
entity_class = super().__new__(mcs, name, bases, dct)
# TODO: consider encapsulation of each validation rule to function to make this class cleaner.
if "Meta" not in dir(entity_class):
raise NonExistingMetaError(
f"Private class `Meta not defined for {entity_class.__name__}."
)
if bases != (ABC,):
if missing_fields := [
field
for field in mcs.meta_fields
if field not in dir(entity_class.Meta)
]:
raise MetaFieldsException(f"Missing Meta fields: {missing_fields}.")
if not entity_class.Meta.keys:
raise MetaEmptyKeysException("Meta keys can not be empty.")
validate_meta_fields(entity_class, mcs.meta_fields)
validate_meta_keys(entity_class)
return entity_class


class CompoundEntityMetaClass(MetaClass):

meta_fields = ["sources"]

def __new__(mcs, name, bases, dct):
entity_class = super().__new__(mcs, name, bases, dct)
if bases != (ABC,):
validate_meta_fields(entity_class, mcs.meta_fields)
validate_meta_sources(entity_class)
validate_meta_sources_keys(entity_class)
validate_meta_sources_fields(entity_class)
return entity_class
4 changes: 4 additions & 0 deletions src/data_snack/entities/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ class MetaFieldsException(Exception):

class MetaEmptyKeysException(Exception):
...


class SourceEntityFieldException(Exception):
...
28 changes: 28 additions & 0 deletions src/data_snack/entities/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from dataclasses import dataclass
from typing import List, Dict

from data_snack.entities.base import Entity


@dataclass
class EntityFieldMapping:
field: str
source_field: str


@dataclass
class SourceEntity:
entity: type(Entity)
entity_fields_mapping: List[EntityFieldMapping]

def __post_init__(self):
self._fields_mapping = {mapping.field: mapping.source_field for mapping in self.entity_fields_mapping}
self._source_fields_mapping = {source_field: field for field, source_field in self._fields_mapping.items()}

@property
def fields_mapping(self) -> Dict[str, str]:
return self._fields_mapping

@property
def source_fields_mapping(self) -> Dict[str, str]:
return self._source_fields_mapping
2 changes: 1 addition & 1 deletion src/data_snack/entities/registry.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, Type

from data_snack.entities import Entity
from data_snack.entities.base import Entity
from data_snack.serializers import Serializer

EntityRegistry = Dict[Type[Entity], Serializer]
Expand Down
2 changes: 1 addition & 1 deletion src/data_snack/entities/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, Callable, Dict, Type, get_type_hints

from data_snack.entities import Entity
from data_snack.entities.base import Entity

EntitySchemaGetter = Callable[[Type[Entity], bool], Dict[str, Any]]

Expand Down
9 changes: 9 additions & 0 deletions src/data_snack/entities/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from typing import List, Dict


def map_values(mappings: Dict[str, str], values: List[str]) -> List[str]:
return list(map(mappings.get, values))


def filter_missing_values(values: List[str]) -> List[str]:
return list(filter(None, values))
89 changes: 89 additions & 0 deletions src/data_snack/entities/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from abc import ABCMeta
from itertools import chain
from typing import List

from data_snack.entities.exceptions import (
NonExistingMetaError,
MetaFieldsException,
MetaEmptyKeysException,
SourceEntityFieldException,
)


def validate_meta_class(entity_class: ABCMeta) -> None:
"""
Validates if entity contains 'Meta' private class.
:param entity_class: entity to validate
"""
try:
getattr(entity_class, "Meta")
except AttributeError:
raise NonExistingMetaError(
f"Private class `Meta not defined for {entity_class.__name__}."
)


def validate_meta_fields(entity_class: ABCMeta, meta_fields: List[str]) -> None:
"""
Validates if entity 'Meta' private class has all fields defined.
:param entity_class: entity to validate
:param meta_fields: 'Meta' class fields list
"""
missing_fields = []
for field in meta_fields:
try:
getattr(entity_class.Meta, field)
except AttributeError:
missing_fields.append(field)
if missing_fields:
raise MetaFieldsException(f"Missing Meta fields: {missing_fields}.")


def validate_meta_keys(entity_class: ABCMeta) -> None:
"""
Validates if 'keys' field of entity 'Meta' private class is not empty.
:param entity_class: entity to validate
"""
if not entity_class.Meta.keys:
raise MetaEmptyKeysException("Meta keys can not be empty.")


def validate_meta_sources(entity_class: ABCMeta) -> None:
"""
Validates if 'sources' field of entity 'Meta' private class is not empty.
:param entity_class: entity to validate
"""
if not entity_class.Meta.sources:
raise MetaEmptyKeysException("Meta sources can not be empty.")


def validate_meta_sources_keys(entity_class: ABCMeta) -> None:
"""
Validates if all source entities keys are defined in entity.
:param entity_class: entity to validate
"""
if missing_keys := list(chain(*[
[
f"{source.entity.__name__}.{key}"
for key in source.entity.get_keys()
if key not in source.source_fields_mapping.keys()
]
for source in entity_class.Meta.sources
])):
raise SourceEntityFieldException(f"Missing source entity keys: {missing_keys}.")


def validate_meta_sources_fields(entity_class: ABCMeta) -> None:
"""
Validates if all fields defined in entity have mappings to fields from the source entities.
:param entity_class: entity to validate
"""
if missing_mappings := list(chain(*[
[
f"{source.entity.__name__}.{field}"
for field in source.source_fields_mapping.keys()
if field not in source.entity.get_all_fields()
]
for source in entity_class.Meta.sources
])):
raise SourceEntityFieldException(f"Missing source entity fields: {missing_mappings}.")
Loading

0 comments on commit 2627f73

Please sign in to comment.