Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix numpy type hints + fix series_to_series transformers #8

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 30 additions & 25 deletions tsururu/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class IndexSlicer:
"""

@staticmethod
def timedelta(x: Tuple[NDArray[Union[np.int, np.float]], pd.Timedelta]):
def timedelta(x: Tuple[NDArray[Union[np.integer, np.floating]], pd.Timedelta]):
"""
Returns the difference between neighboring observations
in the array in terms of delta and the delta itself.
Expand Down Expand Up @@ -67,7 +67,7 @@ def get_cols_idx(data: pd.DataFrame, columns: List):
@staticmethod
def get_slice(
data: pd.DataFrame,
k: Tuple[NDArray[np.int], NDArray[np.int]],
k: Tuple[NDArray[int], NDArray[int]],
) -> pd.DataFrame:
"""Get 3d slice.

Expand Down Expand Up @@ -118,14 +118,14 @@ def ids_from_date(
return ids

def _rolling_window(
self, a: NDArray[np.float], window: int, step: int, from_last: bool = True
self, a: NDArray[np.floating], window: int, step: int, from_last: bool = True
):
sliding_window = np.lib.stride_tricks.sliding_window_view(a, window)
return sliding_window[(len(a) - window) % step if from_last else 0:][::step]
return sliding_window[(len(a) - window) % step if from_last else 0 :][::step]

def _create_idx_data(
self,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
Expand All @@ -136,7 +136,7 @@ def _create_idx_data(

def _create_idx_target(
self,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
Expand All @@ -149,23 +149,23 @@ def _create_idx_target(

def _create_idx_test(
self,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
_,
__,
):
return self._rolling_window(np.arange(len(data)), history, step)[-(horizon + 1):-horizon]
return self._rolling_window(np.arange(len(data)), history, step)[-(horizon + 1) : -horizon]

def _get_ids(
self,
func,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
ids: NDArray[np.int],
ids: NDArray[np.integer],
cond: int = 0,
n_last_horizon: Optional[int] = None,
):
Expand All @@ -185,11 +185,11 @@ def _get_ids(

def create_idx_data(
self,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
ids: Optional[NDArray[np.int]] = None,
ids: Optional[NDArray[np.integer]] = None,
date_column: Optional[str] = None,
):
"""Find indices that, when applied to the original dataset,
Expand Down Expand Up @@ -224,11 +224,11 @@ def create_idx_data(

def create_idx_test(
self,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
ids: Optional[NDArray[np.int]] = None,
ids: Optional[NDArray[np.integer]] = None,
date_column: Optional[str] = None,
):
"""Find indices that, when applied to the original dataset,
Expand Down Expand Up @@ -263,11 +263,11 @@ def create_idx_test(

def create_idx_target(
self,
data: NDArray[np.float],
data: NDArray[np.floating],
horizon: int,
history: int,
step: int,
ids: Optional[NDArray[np.int]] = None,
ids: Optional[NDArray[np.integer]] = None,
date_column: Optional[str] = None,
n_last_horizon: Optional[int] = None,
):
Expand Down Expand Up @@ -368,21 +368,21 @@ def make_padded_test(
"""

def _crop_segment(
segment: NDArray[Union[np.float, np.str]],
segment: NDArray[Union[np.floating, np.str_]],
test_last: bool,
) -> NDArray[Union[np.float, np.str]]:
) -> NDArray[Union[np.floating, np.str_]]:
if test_last:
return segment[-self.history:]
return segment[-self.history - horizon:-horizon]
return segment[-self.history :]
return segment[-self.history - horizon : -horizon]

def _pad_segment(
segment: NDArray[Union[np.float, np.str]],
segment: NDArray[Union[np.floating, np.str_]],
horizon: int,
time_delta: pd.Timedelta,
date_col_id: Optional[int],
id_col_id: Optional[Union[str, NDArray[np.str]]],
) -> NDArray[Union[np.float, np.str]]:
result = np.full((horizon, segment.shape[1]), None)
id_col_id: Optional[Union[str, NDArray[np.str_]]],
) -> NDArray[Union[np.floating, np.str_]]:
result = np.full((horizon, segment.shape[1]), np.nan)

last_date = segment[-1, date_col_id]
new_dates = pd.date_range(last_date + time_delta, periods=horizon, freq=time_delta)
Expand Down Expand Up @@ -420,4 +420,9 @@ def _pad_segment(

# Concatenate together
result = np.vstack(np.concatenate((segments, padded_segments_results), axis=1))
return pd.DataFrame(result, columns=columns)
result = pd.DataFrame(result, columns=columns)
result[self.date_column] = pd.to_datetime(result[self.date_column])
result[self.id_column] = result[self.id_column].astype("int")
other = [col for col in columns if col not in [self.id_column, self.date_column]]
result[other] = result[other].astype("float")
return result
8 changes: 4 additions & 4 deletions tsururu/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def initialize_validator(self):
cv = TimeSeriesSplit(n_splits=self.validation_params["n_splits"])
return cv

def fit(self, X: pd.DataFrame, y: NDArray[np.float]) -> None:
def fit(self, X: pd.DataFrame, y: NDArray[np.floating]) -> None:
"""Initialization and training of the model according to the
passed parameters.

Expand All @@ -80,7 +80,7 @@ def fit(self, X: pd.DataFrame, y: NDArray[np.float]) -> None:
"""
raise NotImplementedError()

def predict(self, X: pd.DataFrame) -> NDArray[np.float]:
def predict(self, X: pd.DataFrame) -> NDArray[np.floating]:
"""Obtaining model predictions.

Arguments:
Expand All @@ -101,7 +101,7 @@ def __init__(
):
super().__init__(get_num_iterations, validation_params, model_params)

def fit(self, X: pd.DataFrame, y: NDArray[np.float]) -> None:
def fit(self, X: pd.DataFrame, y: NDArray[np.floating]) -> None:
# Initialize cv object
cv = self.initialize_validator()

Expand Down Expand Up @@ -151,7 +151,7 @@ def fit(self, X: pd.DataFrame, y: NDArray[np.float]) -> None:
print(f"Mean {self.model_params['loss_function']}: {np.mean(self.scores).round(4)}")
print(f"Std: {np.std(self.scores).round(4)}")

def predict(self, X: pd.DataFrame) -> NDArray[np.float]:
def predict(self, X: pd.DataFrame) -> NDArray[np.floating]:
models_preds = [model.predict(X) for model in self.models]
y_pred = np.mean(models_preds, axis=0)
return y_pred
Expand Down
16 changes: 8 additions & 8 deletions tsururu/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ def _backtest_generator(

@staticmethod
def _make_multivariate_X_y(
X: pd.DataFrame, y: NDArray[np.float]
) -> Tuple[pd.DataFrame, NDArray[np.float]]:
X: pd.DataFrame, y: NDArray[np.floating]
) -> Tuple[pd.DataFrame, NDArray[np.floating]]:
raise NotImplementedError()

def _generate_X_y(
Expand All @@ -162,7 +162,7 @@ def _generate_X_y(
target_horizon: int,
is_train: bool,
history: str = None,
idx: Optional[NDArray[np.float]] = None,
idx: Optional[NDArray[np.floating]] = None,
n_last_horizon: Optional[int] = None,
X_only: bool = False,
):
Expand All @@ -177,7 +177,7 @@ def fit(self, dataset: TSDataset):

def back_test(
self, dataset: TSDataset, cv: int
) -> Union[List, NDArray[Union[np.float, np.str]]]:
) -> Union[List, NDArray[Union[np.floating, np.str_]]]:
ids_list = []
test_list = []
preds_list = []
Expand Down Expand Up @@ -221,7 +221,7 @@ def back_test(
)

@timing_decorator
def predict(self, dataset: TSDataset) -> NDArray[np.float]:
def predict(self, dataset: TSDataset) -> NDArray[np.floating]:
raise NotImplementedError()


Expand Down Expand Up @@ -283,7 +283,7 @@ def _make_multivariate_X_y(
self,
X: pd.DataFrame,
date_column: "str",
y: Optional[NDArray[np.float]] = None,
y: Optional[NDArray[np.floating]] = None,
):
idx_slicer = IndexSlicer()

Expand Down Expand Up @@ -337,7 +337,7 @@ def _generate_X_y(
target_horizon: int,
is_train: bool,
history: str = None,
idx: Optional[NDArray[np.float]] = None,
idx: Optional[NDArray[np.floating]] = None,
n_last_horizon: Optional[int] = None,
X_only: bool = False,
):
Expand Down Expand Up @@ -1148,7 +1148,7 @@ def _generate_X_y(
target_horizon: int,
is_train: bool,
history: str = None,
idx: Optional[NDArray[np.float]] = None,
idx: Optional[NDArray[np.floating]] = None,
n_last_horizon: Optional[int] = None,
X_only: bool = False,
):
Expand Down
24 changes: 10 additions & 14 deletions tsururu/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ def transform(
X_only: bool,
) -> Tuple[pd.DataFrame]:
if self.transform_train:
raw_ts_X = raw_ts_X.groupby(self.id_column).apply(self._transform_segment)
raw_ts_X = raw_ts_X.groupby(self.id_column).apply(self._transform_segment).reset_index(level=self.id_column, drop=True)
if self.transform_target and not X_only:
raw_ts_y = raw_ts_y.groupby(self.id_column).apply(self._transform_segment)
raw_ts_y = raw_ts_y.groupby(self.id_column).apply(self._transform_segment).reset_index(level=self.id_column, drop=True)
return raw_ts_X, raw_ts_y, features_X, y

def inverse_transform_y(self, y: pd.DataFrame) -> pd.DataFrame:
return y.groupby(self.id_column).apply(self._inverse_transform_segment)
return y.groupby(self.id_column).apply(self._inverse_transform_segment).reset_index(level=self.id_column, drop=True)


class FeaturesToFeaturesTransformer(SeriesToFeaturesTransformer):
Expand Down Expand Up @@ -283,7 +283,7 @@ def fit(
column
for column in self.columns
if issubclass(raw_ts_X[column].dtype.type, np.integer)
or issubclass(raw_ts_X[column].dtype.type, np.float)
or issubclass(raw_ts_X[column].dtype.type, np.floating)
]
stat_df = raw_ts_X.groupby(id_column)[self.columns].agg(["mean", "std"])
self.params = stat_df.to_dict(orient="index")
Expand All @@ -297,7 +297,7 @@ class DifferenceNormalizer(SeriesToSeriesTransformer):
type: "delta" to take the difference or "ratio" -- ratio
between the current and the previous value.

self.params: dict with first values by each id
self.params: dict with last values by each id (for targets' inverse transform)
"""

def __init__(self, regime: str = "delta"):
Expand Down Expand Up @@ -327,13 +327,9 @@ def _inverse_transform_segment(self, segment: pd.Series) -> pd.Series:
current_columns_mask = [segment.columns.str.contains(current_column_name)][0]
current_last_value = self.params[current_id][current_column_name]
if self.type == "delta":
segment.loc[:, current_columns_mask] = (
segment.loc[:, current_columns_mask] + current_last_value
)
segment.loc[:, current_columns_mask] = np.cumsum(np.append(current_last_value, segment.loc[:, current_columns_mask].values))[1:]
if self.type == "ratio":
segment.loc[:, current_columns_mask] = (
segment.loc[:, current_columns_mask] * current_last_value
)
segment.loc[:, current_columns_mask] = np.cumprod(np.append(current_last_value, segment.loc[:, current_columns_mask].values))[1:]
return segment

def fit(
Expand Down Expand Up @@ -361,7 +357,7 @@ def fit(
column
for column in self.columns
if issubclass(raw_ts_X[column].dtype.type, np.integer)
or issubclass(raw_ts_X[column].dtype.type, np.float)
or issubclass(raw_ts_X[column].dtype.type, np.floating)
]
last_values_df = raw_ts_X.groupby(self.id_column)[self.columns].last()
self.params = last_values_df.to_dict(orient="index")
Expand Down Expand Up @@ -438,7 +434,7 @@ def transform(
features_X[columns_to_transform] - last_values
)
if self.transform_target and not X_only:
y.loc[:, column_name] = y[column_name] - last_values
y = y - last_values
elif self.regime == "ratio":
if self.transform_train:
features_X.loc[:, columns_to_transform] = (
Expand Down Expand Up @@ -656,7 +652,7 @@ def __init__(
self,
lags: Union[int, List[int], np.ndarray],
drop_raw_features: bool,
idx_data: NDArray[np.float],
idx_data: NDArray[np.floating],
):
super().__init__()
if isinstance(lags, list):
Expand Down