-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from alexprodan99/apd/new-rules
add more datasets
- Loading branch information
Showing
22 changed files
with
400 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,23 @@ | ||
# Changelog | ||
|
||
## [0.0.2] - 2024-05-31 | ||
### Added | ||
- random categorical dataset | ||
- random number timeseries dataset | ||
- sequence categorical dataset | ||
- sequence number timeseries dataset | ||
|
||
### Changed | ||
- updated the sequence number dataset to include support for floating numbers | ||
- updated the publish pipeline to run on multiple operating systems | ||
- updated the publish pipeline to generate an executable for generating template data | ||
|
||
## [0.0.1] - 2024-05-18 | ||
### Added | ||
- Initial project setup | ||
- v0.0.1 dataset contract | ||
- v0.0.1 random number dataset | ||
- v0.0.1 sequence dataset | ||
|
||
|
||
|
||
[0.0.2]: https://github.com/alexprodan99/pydatagenerator/compare/v0.0.1...v0.0.2 | ||
[0.0.1]: https://github.com/alexprodan99/pydatagenerator/releases/tag/v0.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
from pydatagenerator.data.abstract.abstract_dataset import AbstractDataSet, DataSetConstants | ||
from pydatagenerator.data.abstract.abstract_dataset_handler_factory import AbstractDataSetHandler | ||
from .abstract_dataset import AbstractDataSet, DataSetConstants | ||
from .abstract_dataset_handler_factory import AbstractDataSetHandler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,7 @@ | ||
from pydatagenerator.data.impl.random_number_data_set import RandomNumberDataSet | ||
from pydatagenerator.data.impl.sequence_data_set import SequenceDataSet | ||
from pydatagenerator.data.impl.dataset_handler_factory import DatasetHandlerFactory | ||
from .random_number_dataset import RandomNumberDataSet | ||
from .random_number_timeseries_dataset import RandomNumberTimeSeriesDataset | ||
from .random_categorical_dataset import RandomCategoricalDataSet | ||
from .sequence_number_timeseries_dataset import SequenceNumberTimeSeriesDataset | ||
from .sequence_number_dataset import SequenceNumberDataSet | ||
from .sequence_categorical_dataset import SequenceCategoricalDataSet | ||
from .dataset_handler_factory import DatasetHandlerFactory |
32 changes: 32 additions & 0 deletions
32
src/pydatagenerator/data/impl/random_categorical_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import random | ||
from pydatagenerator.data.abstract import AbstractDataSet | ||
|
||
class RandomCategoricalDataSet(AbstractDataSet): | ||
"""RandomCategoricalDataSet | ||
""" | ||
type = 'type.random-categorical-dataset' | ||
|
||
def required_fields(self): | ||
"""Returns the required fields for the current data set | ||
Returns: | ||
List[str]: List of required fields for the current data set | ||
""" | ||
return ['categories'] | ||
|
||
def optional_fields(self): | ||
"""Returns the optional fields for the current data set | ||
Returns: | ||
List[str]: List of optional fields for the current data set | ||
""" | ||
return [] | ||
|
||
def handle(self) -> object: | ||
"""Process the given dataset_info and returns a result out of it | ||
Returns: | ||
object: The result obtained after processing the dataset_info | ||
""" | ||
categories = self._dataset_info['categories'] | ||
return categories[random.randint(0, len(categories)-1)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
src/pydatagenerator/data/impl/random_number_timeseries_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import random | ||
from datetime import datetime | ||
from pydatagenerator.data.abstract import AbstractDataSet | ||
|
||
class RandomNumberTimeSeriesDataset(AbstractDataSet): | ||
""""RandomNumberTimeSeriesDataset | ||
""" | ||
type = 'type.random-number-timeseries-dataset' | ||
|
||
def required_fields(self): | ||
"""Returns the required fields for the current data set | ||
Returns: | ||
List[str]: List of required fields for the current data set | ||
""" | ||
return ['min_value', 'max_value', 'min_date', 'max_date'] | ||
|
||
def optional_fields(self): | ||
"""Returns the optional fields for the current data set | ||
Returns: | ||
List[str]: List of optional fields for the current data set | ||
""" | ||
return ['floating', 'date_format'] | ||
|
||
def random_date(self, start: datetime, end: datetime): | ||
epoch = datetime(1970, 1, 1) | ||
start_seconds = int((start - epoch).total_seconds()) | ||
end_seconds = int((end - epoch).total_seconds()) | ||
dt_seconds = random.randint(start_seconds, end_seconds) | ||
return datetime.fromtimestamp(dt_seconds) | ||
|
||
def handle(self) -> object: | ||
"""Process the given dataset_info and returns a result out of it | ||
Returns: | ||
object: The result obtained after processing the dataset_info | ||
""" | ||
is_floating = 'floating' in self._dataset_info and self._dataset_info['floating'].lower() == 'true' | ||
func = random.uniform if is_floating else random.randint | ||
value = func(int(self._dataset_info['min_value']), int(self._dataset_info['max_value'])) | ||
# default format is iso-8601 | ||
datetime_format = self._dataset_info['date_format'] if 'date_format' in self._dataset_info else '%Y-%m-%dT%H:%M:%SZ' | ||
min_date = datetime.strptime(self._dataset_info['min_date'], datetime_format) | ||
max_date = datetime.strptime(self._dataset_info['max_date'], datetime_format) | ||
date = self.random_date(min_date, max_date).strftime(datetime_format) | ||
datetime_value = datetime.strptime(date, datetime_format) | ||
return (value, datetime_value) | ||
|
||
|
45 changes: 45 additions & 0 deletions
45
src/pydatagenerator/data/impl/sequence_categorical_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from typing import Dict | ||
from pydatagenerator.data.abstract import AbstractDataSet | ||
|
||
class SequenceCategoricalDataSet(AbstractDataSet): | ||
"""SequenceCategoricalDataSet | ||
""" | ||
type = 'type.sequence-categorical-dataset' | ||
|
||
def __init__(self, dataset_info: Dict[str, object]): | ||
super().__init__(dataset_info) | ||
is_floating = 'floating' in dataset_info \ | ||
and dataset_info['floating'].lower() == 'true' | ||
self.__start = float(dataset_info['start']) if is_floating \ | ||
else int(dataset_info['start']) | ||
self.__increment = float(dataset_info['increment']) if is_floating \ | ||
else int(dataset_info['increment']) | ||
self.__pos = self.__start - self.__increment | ||
self.__categories = self.dataset_info['categories'] | ||
|
||
def required_fields(self): | ||
"""Returns the required fields for the current data set | ||
Returns: | ||
List[str]: List of required fields for the current data set | ||
""" | ||
return ['categories','start','increment'] | ||
|
||
def optional_fields(self): | ||
"""Returns the optional fields for the current data set | ||
Returns: | ||
List[str]: List of optional fields for the current data set | ||
""" | ||
return ['floating'] | ||
|
||
def handle(self) -> object: | ||
"""Process the given dataset_info and returns a result out of it | ||
Returns: | ||
object: The result obtained after processing the dataset_info | ||
""" | ||
self.__pos += self.__increment | ||
self.__pos %= len(self.__categories) | ||
return self.__categories[self.__pos] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
62 changes: 62 additions & 0 deletions
62
src/pydatagenerator/data/impl/sequence_number_timeseries_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import re | ||
from datetime import datetime, timedelta | ||
from typing import Dict, List | ||
from pydatagenerator.data.abstract import AbstractDataSet | ||
|
||
|
||
class SequenceNumberTimeSeriesDataset(AbstractDataSet): | ||
"""SequenceNumberTimeSeriesDataset | ||
""" | ||
type = 'type.sequence-number-timeseries-dataset' | ||
|
||
def required_fields(self) -> List[str]: | ||
"""Returns the required fields for the current data set | ||
Returns: | ||
List[str]: List of required fields for the current data set | ||
""" | ||
return ['start_value', 'increment_value', 'start_date', 'increment_date'] | ||
|
||
def optional_fields(self) -> List[str]: | ||
"""Returns the optional fields for the current data set | ||
Returns: | ||
List[str]: List of optional fields for the current data set | ||
""" | ||
return ['date_format', 'floating'] | ||
|
||
def __init__(self, dataset_info: Dict[str, object]): | ||
super().__init__(dataset_info) | ||
self.__is_floating = 'floating' in dataset_info \ | ||
and dataset_info['floating'].lower() == 'true' | ||
self.__increment = float(dataset_info['increment_value']) if self.__is_floating \ | ||
else int(dataset_info['increment_value']) | ||
self.__parsed_increment_date = self.timedelta_parse(self._dataset_info['increment_date']) | ||
start_value = dataset_info['start_value'] | ||
increment_value = dataset_info['increment_value'] | ||
|
||
self.__val = float(start_value) - float(increment_value) if self.__is_floating \ | ||
else int(start_value) - int(increment_value) | ||
self.__date_format = dataset_info['date_format'] if 'date_format' in dataset_info \ | ||
else '%Y-%m-%dT%H:%M:%SZ' | ||
self.__date = datetime.strptime(dataset_info['start_date'], self.__date_format) - self.__parsed_increment_date | ||
|
||
def timedelta_parse(self, timedelta_str: str) -> timedelta: | ||
value = re.sub(r'[^0-9:.]', "", timedelta_str) | ||
if not value: | ||
return None | ||
return timedelta(**{ | ||
key: float(val) for val, key in zip(value.split(':')[::-1], ('seconds', 'minutes', 'hours', 'days')) | ||
}) | ||
|
||
def handle(self) -> object: | ||
"""Process the given dataset_info and returns a result out of it | ||
Returns: | ||
object: The result obtained after processing the dataset_info | ||
""" | ||
self.__val += self.__increment | ||
self.__date += self.__parsed_increment_date | ||
return (self.__val, self.__date) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from pydatagenerator.xml.abstract.abstract_xml_parser import AbstractXmlParser | ||
from .abstract_xml_parser import AbstractXmlParser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
from pydatagenerator.xml.impl.xml_parser import XmlParser | ||
from pydatagenerator.xml.impl.xml_parser_util import XmlParserUtil | ||
from .xml_parser import XmlParser | ||
from .xml_parser_util import XmlParserUtil |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
tests/pydatagenerator/data/random_categorical_dataset_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from pydatagenerator.data.impl.dataset_handler_factory import DatasetHandlerFactory | ||
|
||
def test_random_categorical_dataset(): | ||
categories = ['red', 'green', 'blue'] | ||
handler = DatasetHandlerFactory().get_dataset_handler({ | ||
'type': 'type.random-categorical-dataset', | ||
'name': 'colors', | ||
'categories': categories | ||
}) | ||
|
||
value = handler.handle() | ||
assert value in categories | ||
|
||
value = handler.handle() | ||
assert value in categories |
Oops, something went wrong.