diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..5e9a11e --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,31 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Attach using Process Id", + "type": "debugpy", + "request": "attach", + "processId": "${command:pickProcess}", + "justMyCode": false + }, + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "purpose": [ + "debug-test" + ], + "console": "integratedTerminal", + "justMyCode": false, + // If coverage report is generated(see pyproject.toml), then the breakpoints will be ignored and there's not possible to debug step by step. + // So we override the pyproject.toml coverage option here + "env": { + "PYTEST_ADDOPTS": "--no-cov" + }, + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9b38853 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..5793046 --- /dev/null +++ b/docs/index.md @@ -0,0 +1 @@ +# PyDataGenerator Docs diff --git a/docs/reference/reference.md b/docs/reference/reference.md new file mode 100644 index 0000000..c0d1c64 --- /dev/null +++ b/docs/reference/reference.md @@ -0,0 +1,13 @@ +# Reference +## Data +::: src.pydatagenerator.data.abstract.abstract_dataset +::: src.pydatagenerator.data.abstract.abstract_dataset_handler_factory +::: src.pydatagenerator.data.impl.dataset_handler_factory +::: src.pydatagenerator.data.impl.random_number_data_set +::: src.pydatagenerator.data.impl.sequence_data_set + + +## Xml +::: src.pydatagenerator.xml.abstract.abstract_xml_parser +::: src.pydatagenerator.xml.impl.xml_parser +::: src.pydatagenerator.xml.impl.xml_parser_util diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..dd865db --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,10 @@ +site_name: Pydatagenerator Docs + +theme: + name: "material" + +nav: +- PyDataGenerator Docs: index.md +- Reference: reference/reference.md +plugins: +- mkdocstrings \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5514a9b..f6a1744 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src"] +packages = ["src/pydatagenerator"] [project] name = "pydatagenerator" @@ -17,6 +17,7 @@ version = "0.0.0" description = "Coming soon" readme = "README.md" requires-python = ">=3.6" +dependencies = ["lxml>=5.2.1"] classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", @@ -25,6 +26,41 @@ classifiers = [ license = { file = "LICENSE" } keywords = ["pydatagenerator", "data", "data-generator", "template"] +[project.optional-dependencies] +dev = [ + "toml>=0.10.2", + "pylint>=3.1.0", + "yapf>=0.40.2", + "flake8>=7.0.0", + "mkdocs>=1.6.0", + "mkdocstrings[python]>=0.18", + "mkdocs-material>=9.5.0", +] +test = ["pytest>=8.2.0", "pytest-cov>=5.0.0"] + +[tool.pytest.ini_options] +addopts = "--cov --cov-report html --cov-report term-missing --cov-fail-under 95" + +[tool.coverage.run] +source = ["src"] + +[tool.yapf] +blank_line_before_nested_class_or_def = true +column_limit = 88 + + +[tool.pylint] +max-line-length = 88 +disable = [ + "C0103", # (invalid-name) + "C0114", # (missing-module-docstring) + "C0115", # (missing-class-docstring) + "C0116", # (missing-function-docstring) + "R0903", # (too-few-public-methods) + "R0913", # (too-many-arguments) + "W0105", # (pointless-string-statement) +] + [project.urls] Homepage = "https://github.com/alexprodan99/pydatagenerator" Documentation = "https://github.com/alexprodan99/pydatagenerator/wiki" diff --git a/src/hello.py b/src/hello.py deleted file mode 100644 index 83ab2b4..0000000 --- a/src/hello.py +++ /dev/null @@ -1,2 +0,0 @@ -def say_hello(): - print("Hello") diff --git a/src/pydatagenerator/__init__.py b/src/pydatagenerator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pydatagenerator/data/__init__.py b/src/pydatagenerator/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pydatagenerator/data/abstract/__init__.py b/src/pydatagenerator/data/abstract/__init__.py new file mode 100644 index 0000000..05157e1 --- /dev/null +++ b/src/pydatagenerator/data/abstract/__init__.py @@ -0,0 +1,2 @@ +from pydatagenerator.data.abstract.abstract_dataset import AbstractDataSet, DataSetConstants +from pydatagenerator.data.abstract.abstract_dataset_handler_factory import AbstractDataSetHandler diff --git a/src/pydatagenerator/data/abstract/abstract_dataset.py b/src/pydatagenerator/data/abstract/abstract_dataset.py new file mode 100644 index 0000000..eaefd26 --- /dev/null +++ b/src/pydatagenerator/data/abstract/abstract_dataset.py @@ -0,0 +1,115 @@ +import abc +import sys +from typing import Dict, List + + +class DataSetConstants: + """DataSetConstants + """ + DEFAULT_REQUIRED_FIELDS = ['name', 'type'] + + +class AbstractDataSet(abc.ABC): + """AbstractDataSet + """ + type = 'type.abstract-dataset' + + def __init__(self, dataset_info: Dict[str, object]): + """Creates a new data set + """ + self._dataset_info = dataset_info + + @property + def dataset_info(self) -> Dict[str, object]: + """Dataset info getter + + Returns: + Dict[str, object]: The dataset_info value + """ + return self._dataset_info + + @dataset_info.setter + def dataset_info(self, value: Dict[str, object]): + """Dataset info setter + + Args: + value (Dict[str, object]): The dataset_info value + """ + self._dataset_info = value + + @dataset_info.deleter + def dataset_info(self) -> None: + """Dataset info deleter + """ + del self._dataset_info + + def validate_default_required_fields(self) -> None: + """Validates the default required fields (name and type) + """ + fields = list(self._dataset_info.keys()) + fields_set = set(fields) + + for field in DataSetConstants.DEFAULT_REQUIRED_FIELDS: + if field not in fields_set: + sys.stderr.write(f'Error: Missing required property {field}') + sys.exit(-1) + + def validate_required_fields(self) -> None: + """Validates the required fields + """ + fields = list(self._dataset_info.keys()) + fields_set = set(fields) + req_fields = self.required_fields() + for field in req_fields: + if field not in fields_set: + sys.stderr.write(f'Error: Missing property {field} \n') + sys.exit(-1) + + def validate_optional_fields(self) -> None: + """Validates the optional fields + """ + fields = list(self._dataset_info.keys()) + req_fields = set(DataSetConstants.DEFAULT_REQUIRED_FIELDS + self.required_fields()) + fields_set = set([field for field in fields if field not in req_fields]) + optional_fields = set(self.optional_fields()) + + diff = fields_set - optional_fields + + if len(diff) > 0: + sys.stderr.write(f'Error: Unknown properties {diff} \n') + sys.exit(-1) + + def validate_dataset_info(self) -> None: + """Validates the dataset info + """ + self.validate_default_required_fields() + if self.type == self._dataset_info['type']: + self.validate_required_fields() + self.validate_optional_fields() + + @abc.abstractmethod + def required_fields(self) -> List[str]: + """Returns the required fields for the current data set + + Returns: + List[str]: List of required fields for the current data set + """ + return [] + + @abc.abstractmethod + def optional_fields(self) -> List[str]: + """Returns the optional fields for the current data set + + Returns: + List[str]: List of optional fields for the current data set + """ + return [] + + @abc.abstractmethod + def handle(self) -> object: + """Process the given dataset_info and returns a result out of it + + Returns: + object: The result obtained after processing the dataset_info + """ + return diff --git a/src/pydatagenerator/data/abstract/abstract_dataset_handler_factory.py b/src/pydatagenerator/data/abstract/abstract_dataset_handler_factory.py new file mode 100644 index 0000000..1f35f5b --- /dev/null +++ b/src/pydatagenerator/data/abstract/abstract_dataset_handler_factory.py @@ -0,0 +1,20 @@ +import abc +from pydatagenerator.data.abstract import AbstractDataSet +from typing import Dict + + +class AbstractDataSetHandler(abc.ABC): + """Dataset handler contract + """ + + @abc.abstractmethod + def get_dataset_handler(self, dataset_info: Dict[str, object]) -> AbstractDataSet: + """Get dataset handler + + Args: + dataset_info (Dict[str, object]): The dataset info to construct the dataset handler for + + Returns: + AbstractDataSet: The data set handler constructed from the dataset_info + """ + return diff --git a/src/pydatagenerator/data/impl/__init__.py b/src/pydatagenerator/data/impl/__init__.py new file mode 100644 index 0000000..0a379f5 --- /dev/null +++ b/src/pydatagenerator/data/impl/__init__.py @@ -0,0 +1,3 @@ +from pydatagenerator.data.impl.random_number_data_set import RandomNumberDataSet +from pydatagenerator.data.impl.sequence_data_set import SequenceDataSet +from pydatagenerator.data.impl.dataset_handler_factory import DatasetHandlerFactory diff --git a/src/pydatagenerator/data/impl/dataset_handler_factory.py b/src/pydatagenerator/data/impl/dataset_handler_factory.py new file mode 100644 index 0000000..ed9c1fa --- /dev/null +++ b/src/pydatagenerator/data/impl/dataset_handler_factory.py @@ -0,0 +1,30 @@ +import sys +from pydatagenerator.data.abstract import AbstractDataSet, AbstractDataSetHandler +from typing import Dict + + +class DatasetHandlerFactory(AbstractDataSetHandler): + """Dataset handler factory + """ + + def get_dataset_handler(self, dataset_info: Dict[str, object]) -> AbstractDataSet: + """Get dataset handler + + Args: + dataset_info (Dict[str, object]): The dataset info to construct the dataset handler for + + Returns: + AbstractDataSet: The data set handler constructed from the dataset_info + """ + type = dataset_info.get('type') + if not type: + sys.stderr.write('Error: No value provided for type') + sys.exit(-1) + handler = self.__classes.get(dataset_info['type']) + if not handler: + sys.stderr.write(f'Error: Unknown type {dataset_info['type']}') + sys.exit(-1) + return handler(dataset_info) + + def __init__(self): + self.__classes = {c.type: c for c in AbstractDataSet.__subclasses__()} diff --git a/src/pydatagenerator/data/impl/random_number_data_set.py b/src/pydatagenerator/data/impl/random_number_data_set.py new file mode 100644 index 0000000..d0b6398 --- /dev/null +++ b/src/pydatagenerator/data/impl/random_number_data_set.py @@ -0,0 +1,35 @@ +import random +from pydatagenerator.data.abstract import AbstractDataSet + + +class RandomNumberDataSet(AbstractDataSet): + """RandomNumberDataSet + """ + type = 'type.random-number-dataset' + + def required_fields(self): + """Returns the required fields for the current data set + + Returns: + List[str]: List of required fields for the current data set + """ + return ['min', 'max'] + + def optional_fields(self): + """Returns the optional fields for the current data set + + Returns: + List[str]: List of optional fields for the current data set + """ + return ['floating'] + + def handle(self) -> object: + """Process the given dataset_info and returns a result out of it + + Returns: + object: The result obtained after processing the dataset_info + """ + self.validate_dataset_info() + is_floating = self._dataset_info['floating'] and self._dataset_info['floating'].lower() == 'true' + func = random.uniform if is_floating else random.randint + return func(int(self._dataset_info['min']), int(self._dataset_info['max'])) diff --git a/src/pydatagenerator/data/impl/sequence_data_set.py b/src/pydatagenerator/data/impl/sequence_data_set.py new file mode 100644 index 0000000..6b9564f --- /dev/null +++ b/src/pydatagenerator/data/impl/sequence_data_set.py @@ -0,0 +1,39 @@ +from typing import Dict, List +from pydatagenerator.data.abstract import AbstractDataSet + + +class SequenceDataSet(AbstractDataSet): + """SequenceDataSet + """ + type = 'type.sequence-dataset' + + def required_fields(self) -> List[str]: + """Returns the required fields for the current data set + + Returns: + List[str]: List of required fields for the current data set + """ + return ['start', 'increment'] + + def optional_fields(self) -> List[str]: + """Returns the optional fields for the current data set + + Returns: + List[str]: List of optional fields for the current data set + """ + return [] + + def __init__(self, dataset_info: Dict[str, object]): + super().__init__(dataset_info) + self.__val = int(dataset_info['start']) - int(dataset_info['increment']) + + def handle(self) -> object: + """Process the given dataset_info and returns a result out of it + + Returns: + object: The result obtained after processing the dataset_info + """ + self.validate_dataset_info() + increment = int(self._dataset_info['increment']) + self.__val += increment + return self.__val diff --git a/src/pydatagenerator/xml/__init__.py b/src/pydatagenerator/xml/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pydatagenerator/xml/abstract/__init__.py b/src/pydatagenerator/xml/abstract/__init__.py new file mode 100644 index 0000000..c0ce884 --- /dev/null +++ b/src/pydatagenerator/xml/abstract/__init__.py @@ -0,0 +1 @@ +from pydatagenerator.xml.abstract.abstract_xml_parser import AbstractXmlParser diff --git a/src/pydatagenerator/xml/abstract/abstract_xml_parser.py b/src/pydatagenerator/xml/abstract/abstract_xml_parser.py new file mode 100644 index 0000000..2a9b14a --- /dev/null +++ b/src/pydatagenerator/xml/abstract/abstract_xml_parser.py @@ -0,0 +1,30 @@ +import abc +from typing import List + +class AbstractXmlParser(abc.ABC): + """XML parser contract + """ + + @abc.abstractmethod + def parse_xml_from_string(self, xml_str: str) -> List[str]: + """Parse xml from string + + Args: + xml_str (str): The xml specification for generating data + + Returns: + List[str]: List of resulted values after processing the xml_str + """ + return + + @abc.abstractmethod + def parse_xml_from_file(self, xml_file: str) -> List[str]: + """Parse xml from file + + Args: + xml_file (str): The path to the xml file containing the xml specification for generating data + + Returns: + List[str]: List of resulted values after processing the xml_str + """ + return \ No newline at end of file diff --git a/src/pydatagenerator/xml/impl/__init__.py b/src/pydatagenerator/xml/impl/__init__.py new file mode 100644 index 0000000..2e9ab3b --- /dev/null +++ b/src/pydatagenerator/xml/impl/__init__.py @@ -0,0 +1,2 @@ +from pydatagenerator.xml.impl.xml_parser import XmlParser +from pydatagenerator.xml.impl.xml_parser_util import XmlParserUtil diff --git a/src/pydatagenerator/xml/impl/xml_parser.py b/src/pydatagenerator/xml/impl/xml_parser.py new file mode 100644 index 0000000..f55af1f --- /dev/null +++ b/src/pydatagenerator/xml/impl/xml_parser.py @@ -0,0 +1,92 @@ +import re +import sys +from lxml import etree +from pydatagenerator.data.impl import DatasetHandlerFactory +from pydatagenerator.xml.abstract import AbstractXmlParser +from pydatagenerator.xml.impl.xml_parser_util import XmlParserUtil +from typing import Dict, List + + +class XmlParser(AbstractXmlParser): + """XML Parser Implementation + """ + + @staticmethod + def parse_template(template_content: str, data_info: Dict[str, object], iterations: int) -> List[str]: + """Parse template + + Args: + template_content (str): The template tag's content + data_info (Dict[str, object]): The data info containing the actual values for the template_content's variables + iterations (int): The number of iterations + + Returns: + List[str]: List of values after replacing the template_content's variables with the data_info values for each iteration + """ + regex = re.compile(r'(?:^|(?<=[^#]))#{\w+}') + iterators = {k: iter(v) for k, v in data_info.items()} + + def subst(match_obj): + key = match_obj.group(0) + # Remove unnecessary characters from key + for c in ['{', '#', '}']: + key = key.replace(c, '') + return str(next(iterators[key])) + + return [regex.sub(subst, template_content) for _ in range(iterations)] + + def parse_xml_from_string(self, xml_str: str) -> List[str]: + """Parse xml from string + + Args: + xml_str (str): The xml specification for generating data + + Returns: + List[str]: List of resulted values after processing the xml_str + """ + root = etree.fromstring(xml_str) + generator = root.xpath('//pydatagenerator')[0] + datasets = generator.xpath('//dataset') + iterations = generator.get('iterations') + handler_factory = DatasetHandlerFactory() + data_info = {} + + if not iterations: + sys.stderr.write('Error: No iterations property found for pydatagenerator tag') + sys.exit(-1) + else: + iterations = int(iterations) + + for _ in range(iterations): + for dataset in datasets: + dataset_info = XmlParserUtil.collect_attributes(dataset) + dataset_handler = handler_factory.get_dataset_handler(dataset_info) + name = dataset_info['name'] + if name in data_info: + data_info[name].append(dataset_handler.handle()) + else: + data_info[name] = [dataset_handler.handle()] + + templates = generator.xpath('//template') + template = templates[0] if templates else None + + if template is None: + sys.stderr.write('Error: No