Skip to content

Commit

Permalink
Merge pull request #3 from alexprodan99/apd/new-rules
Browse files Browse the repository at this point in the history
add more datasets
  • Loading branch information
alexprodan99 authored May 31, 2024
2 parents 357a9ca + 9b08b93 commit 3f62925
Show file tree
Hide file tree
Showing 22 changed files with 400 additions and 63 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ on:
pull_request:
types: [opened, reopened]


jobs:
build:
name: Build distribution 📦
Expand Down Expand Up @@ -61,6 +60,9 @@ jobs:
generate-executable:
name: >-
Generate package 📦 executable
needs:
- build
- test
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down
15 changes: 13 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
# Changelog

## [0.0.2] - 2024-05-31
### Added
- random categorical dataset
- random number timeseries dataset
- sequence categorical dataset
- sequence number timeseries dataset

### Changed
- updated the sequence number dataset to include support for floating numbers
- updated the publish pipeline to run on multiple operating systems
- updated the publish pipeline to generate an executable for generating template data

## [0.0.1] - 2024-05-18
### Added
- Initial project setup
- v0.0.1 dataset contract
- v0.0.1 random number dataset
- v0.0.1 sequence dataset



[0.0.2]: https://github.com/alexprodan99/pydatagenerator/compare/v0.0.1...v0.0.2
[0.0.1]: https://github.com/alexprodan99/pydatagenerator/releases/tag/v0.0.1
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ authors = [
maintainers = [
{ name = "Alexandru Prodan", email = "prodanalexandru1999@gmail.com" },
]
version = "0.0.1"
description = "Coming soon"
version = "0.0.2"
description = "Generate template data from xml specification"
readme = "README.md"
requires-python = ">=3.6"
dependencies = ["lxml>=5.2.1"]
Expand All @@ -35,7 +35,7 @@ dev = [
"mkdocs>=1.6.0",
"mkdocstrings[python]>=0.18",
"mkdocs-material>=9.5.0",
"pyinstaller>=6.6.0"
"pyinstaller>=6.6.0",
]
test = ["pytest>=8.2.0", "pytest-cov>=5.0.0"]

Expand Down
4 changes: 2 additions & 2 deletions src/pydatagenerator/data/abstract/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from pydatagenerator.data.abstract.abstract_dataset import AbstractDataSet, DataSetConstants
from pydatagenerator.data.abstract.abstract_dataset_handler_factory import AbstractDataSetHandler
from .abstract_dataset import AbstractDataSet, DataSetConstants
from .abstract_dataset_handler_factory import AbstractDataSetHandler
1 change: 1 addition & 0 deletions src/pydatagenerator/data/abstract/abstract_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self, dataset_info: Dict[str, object]):
"""Creates a new data set
"""
self._dataset_info = dataset_info
self.validate_dataset_info()

@property
def dataset_info(self) -> Dict[str, object]:
Expand Down
10 changes: 7 additions & 3 deletions src/pydatagenerator/data/impl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from pydatagenerator.data.impl.random_number_data_set import RandomNumberDataSet
from pydatagenerator.data.impl.sequence_data_set import SequenceDataSet
from pydatagenerator.data.impl.dataset_handler_factory import DatasetHandlerFactory
from .random_number_dataset import RandomNumberDataSet
from .random_number_timeseries_dataset import RandomNumberTimeSeriesDataset
from .random_categorical_dataset import RandomCategoricalDataSet
from .sequence_number_timeseries_dataset import SequenceNumberTimeSeriesDataset
from .sequence_number_dataset import SequenceNumberDataSet
from .sequence_categorical_dataset import SequenceCategoricalDataSet
from .dataset_handler_factory import DatasetHandlerFactory
32 changes: 32 additions & 0 deletions src/pydatagenerator/data/impl/random_categorical_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import random
from pydatagenerator.data.abstract import AbstractDataSet

class RandomCategoricalDataSet(AbstractDataSet):
"""RandomCategoricalDataSet
"""
type = 'type.random-categorical-dataset'

def required_fields(self):
"""Returns the required fields for the current data set
Returns:
List[str]: List of required fields for the current data set
"""
return ['categories']

def optional_fields(self):
"""Returns the optional fields for the current data set
Returns:
List[str]: List of optional fields for the current data set
"""
return []

def handle(self) -> object:
"""Process the given dataset_info and returns a result out of it
Returns:
object: The result obtained after processing the dataset_info
"""
categories = self._dataset_info['categories']
return categories[random.randint(0, len(categories)-1)]
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def handle(self) -> object:
Returns:
object: The result obtained after processing the dataset_info
"""
self.validate_dataset_info()
is_floating = self._dataset_info['floating'] and self._dataset_info['floating'].lower() == 'true'
is_floating = 'floating' in self._dataset_info and self._dataset_info['floating'].lower() == 'true'
func = random.uniform if is_floating else random.randint
return func(int(self._dataset_info['min']), int(self._dataset_info['max']))
50 changes: 50 additions & 0 deletions src/pydatagenerator/data/impl/random_number_timeseries_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import random
from datetime import datetime
from pydatagenerator.data.abstract import AbstractDataSet

class RandomNumberTimeSeriesDataset(AbstractDataSet):
""""RandomNumberTimeSeriesDataset
"""
type = 'type.random-number-timeseries-dataset'

def required_fields(self):
"""Returns the required fields for the current data set
Returns:
List[str]: List of required fields for the current data set
"""
return ['min_value', 'max_value', 'min_date', 'max_date']

def optional_fields(self):
"""Returns the optional fields for the current data set
Returns:
List[str]: List of optional fields for the current data set
"""
return ['floating', 'date_format']

def random_date(self, start: datetime, end: datetime):
epoch = datetime(1970, 1, 1)
start_seconds = int((start - epoch).total_seconds())
end_seconds = int((end - epoch).total_seconds())
dt_seconds = random.randint(start_seconds, end_seconds)
return datetime.fromtimestamp(dt_seconds)

def handle(self) -> object:
"""Process the given dataset_info and returns a result out of it
Returns:
object: The result obtained after processing the dataset_info
"""
is_floating = 'floating' in self._dataset_info and self._dataset_info['floating'].lower() == 'true'
func = random.uniform if is_floating else random.randint
value = func(int(self._dataset_info['min_value']), int(self._dataset_info['max_value']))
# default format is iso-8601
datetime_format = self._dataset_info['date_format'] if 'date_format' in self._dataset_info else '%Y-%m-%dT%H:%M:%SZ'
min_date = datetime.strptime(self._dataset_info['min_date'], datetime_format)
max_date = datetime.strptime(self._dataset_info['max_date'], datetime_format)
date = self.random_date(min_date, max_date).strftime(datetime_format)
datetime_value = datetime.strptime(date, datetime_format)
return (value, datetime_value)


45 changes: 45 additions & 0 deletions src/pydatagenerator/data/impl/sequence_categorical_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from typing import Dict
from pydatagenerator.data.abstract import AbstractDataSet

class SequenceCategoricalDataSet(AbstractDataSet):
"""SequenceCategoricalDataSet
"""
type = 'type.sequence-categorical-dataset'

def __init__(self, dataset_info: Dict[str, object]):
super().__init__(dataset_info)
is_floating = 'floating' in dataset_info \
and dataset_info['floating'].lower() == 'true'
self.__start = float(dataset_info['start']) if is_floating \
else int(dataset_info['start'])
self.__increment = float(dataset_info['increment']) if is_floating \
else int(dataset_info['increment'])
self.__pos = self.__start - self.__increment
self.__categories = self.dataset_info['categories']

def required_fields(self):
"""Returns the required fields for the current data set
Returns:
List[str]: List of required fields for the current data set
"""
return ['categories','start','increment']

def optional_fields(self):
"""Returns the optional fields for the current data set
Returns:
List[str]: List of optional fields for the current data set
"""
return ['floating']

def handle(self) -> object:
"""Process the given dataset_info and returns a result out of it
Returns:
object: The result obtained after processing the dataset_info
"""
self.__pos += self.__increment
self.__pos %= len(self.__categories)
return self.__categories[self.__pos]

Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from pydatagenerator.data.abstract import AbstractDataSet


class SequenceDataSet(AbstractDataSet):
"""SequenceDataSet
class SequenceNumberDataSet(AbstractDataSet):
"""SequenceNumberDataSet
"""
type = 'type.sequence-dataset'
type = 'type.sequence-number-dataset'

def required_fields(self) -> List[str]:
"""Returns the required fields for the current data set
Expand All @@ -21,19 +21,23 @@ def optional_fields(self) -> List[str]:
Returns:
List[str]: List of optional fields for the current data set
"""
return []
return ['floating']

def __init__(self, dataset_info: Dict[str, object]):
super().__init__(dataset_info)
self.__val = int(dataset_info['start']) - int(dataset_info['increment'])
is_floating = 'floating' in dataset_info \
and dataset_info['floating'].lower() == 'true'
self.__start = float(dataset_info['start']) if is_floating \
else int(dataset_info['start'])
self.__increment = float(dataset_info['increment']) if is_floating \
else int(dataset_info['increment'])
self.__val = self.__start - self.__increment

def handle(self) -> object:
"""Process the given dataset_info and returns a result out of it
Returns:
object: The result obtained after processing the dataset_info
"""
self.validate_dataset_info()
increment = int(self._dataset_info['increment'])
self.__val += increment
self.__val += self.__increment
return self.__val
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import re
from datetime import datetime, timedelta
from typing import Dict, List
from pydatagenerator.data.abstract import AbstractDataSet


class SequenceNumberTimeSeriesDataset(AbstractDataSet):
"""SequenceNumberTimeSeriesDataset
"""
type = 'type.sequence-number-timeseries-dataset'

def required_fields(self) -> List[str]:
"""Returns the required fields for the current data set
Returns:
List[str]: List of required fields for the current data set
"""
return ['start_value', 'increment_value', 'start_date', 'increment_date']

def optional_fields(self) -> List[str]:
"""Returns the optional fields for the current data set
Returns:
List[str]: List of optional fields for the current data set
"""
return ['date_format', 'floating']

def __init__(self, dataset_info: Dict[str, object]):
super().__init__(dataset_info)
self.__is_floating = 'floating' in dataset_info \
and dataset_info['floating'].lower() == 'true'
self.__increment = float(dataset_info['increment_value']) if self.__is_floating \
else int(dataset_info['increment_value'])
self.__parsed_increment_date = self.timedelta_parse(self._dataset_info['increment_date'])
start_value = dataset_info['start_value']
increment_value = dataset_info['increment_value']

self.__val = float(start_value) - float(increment_value) if self.__is_floating \
else int(start_value) - int(increment_value)
self.__date_format = dataset_info['date_format'] if 'date_format' in dataset_info \
else '%Y-%m-%dT%H:%M:%SZ'
self.__date = datetime.strptime(dataset_info['start_date'], self.__date_format) - self.__parsed_increment_date

def timedelta_parse(self, timedelta_str: str) -> timedelta:
value = re.sub(r'[^0-9:.]', "", timedelta_str)
if not value:
return None
return timedelta(**{
key: float(val) for val, key in zip(value.split(':')[::-1], ('seconds', 'minutes', 'hours', 'days'))
})

def handle(self) -> object:
"""Process the given dataset_info and returns a result out of it
Returns:
object: The result obtained after processing the dataset_info
"""
self.__val += self.__increment
self.__date += self.__parsed_increment_date
return (self.__val, self.__date)


2 changes: 1 addition & 1 deletion src/pydatagenerator/xml/abstract/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from pydatagenerator.xml.abstract.abstract_xml_parser import AbstractXmlParser
from .abstract_xml_parser import AbstractXmlParser
4 changes: 2 additions & 2 deletions src/pydatagenerator/xml/impl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from pydatagenerator.xml.impl.xml_parser import XmlParser
from pydatagenerator.xml.impl.xml_parser_util import XmlParserUtil
from .xml_parser import XmlParser
from .xml_parser_util import XmlParserUtil
3 changes: 3 additions & 0 deletions src/pydatagenerator/xml/impl/xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def parse_xml_from_string(self, xml_str: str) -> List[str]:
for _ in range(iterations):
for dataset in datasets:
dataset_info = XmlParserUtil.collect_attributes(dataset)
categories = dataset.xpath('//categories')
if categories:
dataset_info['categories'] = [category.get('value') for category in categories]
dataset_handler = handler_factory.get_dataset_handler(dataset_info)
name = dataset_info['name']
if name in data_info:
Expand Down
15 changes: 15 additions & 0 deletions tests/pydatagenerator/data/random_categorical_dataset_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pydatagenerator.data.impl.dataset_handler_factory import DatasetHandlerFactory

def test_random_categorical_dataset():
categories = ['red', 'green', 'blue']
handler = DatasetHandlerFactory().get_dataset_handler({
'type': 'type.random-categorical-dataset',
'name': 'colors',
'categories': categories
})

value = handler.handle()
assert value in categories

value = handler.handle()
assert value in categories
Loading

0 comments on commit 3f62925

Please sign in to comment.