From 1a71969b252c460382b971f694e3936fa5936f35 Mon Sep 17 00:00:00 2001 From: Nils Braun Date: Sun, 14 Jul 2024 13:16:56 +0200 Subject: [PATCH] Fix code and tests for numpy >= 2.0 (#1085) --- tests/units/feature_extraction/test_data.py | 10 +-- .../test_feature_calculations.py | 62 +++++++++---------- .../units/feature_extraction/test_settings.py | 2 +- tests/units/feature_selection/test_checks.py | 4 +- .../transformers/test_per_column_imputer.py | 24 +++---- .../utilities/test_dataframe_functions.py | 38 ++++++------ .../feature_extraction/feature_calculators.py | 56 ++++++++--------- tsfresh/feature_selection/relevance.py | 2 +- tsfresh/utilities/dataframe_functions.py | 6 +- tsfresh/utilities/string_manipulation.py | 6 +- 10 files changed, 103 insertions(+), 107 deletions(-) diff --git a/tests/units/feature_extraction/test_data.py b/tests/units/feature_extraction/test_data.py index 124385ff..531c550d 100644 --- a/tests/units/feature_extraction/test_data.py +++ b/tests/units/feature_extraction/test_data.py @@ -457,7 +457,7 @@ def test_f(chunk): ) def test_with_wrong_input(self): - test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": np.NaN}]) + test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": np.nan}]) self.assertRaises(ValueError, to_tsdata, test_df, "id", "kind", "value", "sort") test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": 1}]) @@ -489,10 +489,10 @@ def test_with_wrong_input(self): ValueError, to_tsdata, test_df, "id", "strange_kind", "value", "sort" ) - test_df = pd.DataFrame([{"id": np.NaN, "kind": "a", "value": 3, "sort": 1}]) + test_df = pd.DataFrame([{"id": np.nan, "kind": "a", "value": 3, "sort": 1}]) self.assertRaises(ValueError, to_tsdata, test_df, "id", "kind", "value", "sort") - test_df = pd.DataFrame([{"id": 0, "kind": np.NaN, "value": 3, "sort": 1}]) + test_df = pd.DataFrame([{"id": 0, "kind": np.nan, "value": 3, "sort": 1}]) self.assertRaises(ValueError, to_tsdata, test_df, "id", "kind", "value", "sort") test_df = pd.DataFrame([{"id": 2}, {"id": 1}]) @@ -518,10 +518,10 @@ def test_with_wrong_input(self): # If there are more than one column, the algorithm can not choose the correct column self.assertRaises(ValueError, to_tsdata, test_dict, "id", None, None, None) - test_df = pd.DataFrame([{"id": 0, "value": np.NaN}]) + test_df = pd.DataFrame([{"id": 0, "value": np.nan}]) self.assertRaises(ValueError, to_tsdata, test_df, "id", None, "value", None) - test_df = pd.DataFrame([{"id": 0, "value": np.NaN}]) + test_df = pd.DataFrame([{"id": 0, "value": np.nan}]) self.assertRaises(ValueError, to_tsdata, test_df, None, None, "value", None) test_df = pd.DataFrame([{"id": 0, "a_": 3, "b": 5, "sort": 1}]) diff --git a/tests/units/feature_extraction/test_feature_calculations.py b/tests/units/feature_extraction/test_feature_calculations.py index 9a59d0d3..467d1cac 100644 --- a/tests/units/feature_extraction/test_feature_calculations.py +++ b/tests/units/feature_extraction/test_feature_calculations.py @@ -28,7 +28,7 @@ def tearDown(self): warnings.resetwarnings() def assertIsNaN(self, result): - self.assertTrue(np.isnan(result), msg="{} is not np.NaN") + self.assertTrue(np.isnan(result), msg="{} is not np.nan") def assertEqualOnAllArrayTypes(self, f, input_to_f, result, *args, **kwargs): expected_result = f(input_to_f, *args, **kwargs) @@ -575,7 +575,7 @@ def test_length(self): self.assertEqualOnAllArrayTypes(length, [1, 2, 3, 4], 4) self.assertEqualOnAllArrayTypes(length, [1, 2, 3], 3) self.assertEqualOnAllArrayTypes(length, [1, 2], 2) - self.assertEqualOnAllArrayTypes(length, [1, 2, 3, np.NaN], 4) + self.assertEqualOnAllArrayTypes(length, [1, 2, 3, np.nan], 4) self.assertEqualOnAllArrayTypes(length, [], 0) def test_standard_deviation(self): @@ -1180,26 +1180,26 @@ def test_binned_entropy(self): self.assertAlmostEqualOnAllArrayTypes( binned_entropy, [10] * 10 + [1], - -(10 / 11 * np.math.log(10 / 11) + 1 / 11 * np.math.log(1 / 11)), + -(10 / 11 * math.log(10 / 11) + 1 / 11 * math.log(1 / 11)), 10, ) self.assertAlmostEqualOnAllArrayTypes( binned_entropy, [10] * 10 + [1], - -(10 / 11 * np.math.log(10 / 11) + 1 / 11 * np.math.log(1 / 11)), + -(10 / 11 * math.log(10 / 11) + 1 / 11 * math.log(1 / 11)), 10, ) self.assertAlmostEqualOnAllArrayTypes( binned_entropy, [10] * 10 + [1], - -(10 / 11 * np.math.log(10 / 11) + 1 / 11 * np.math.log(1 / 11)), + -(10 / 11 * math.log(10 / 11) + 1 / 11 * math.log(1 / 11)), 100, ) self.assertAlmostEqualOnAllArrayTypes( - binned_entropy, list(range(10)), -np.math.log(1 / 10), 100 + binned_entropy, list(range(10)), -math.log(1 / 10), 100 ) self.assertAlmostEqualOnAllArrayTypes( - binned_entropy, list(range(100)), -np.math.log(1 / 2), 2 + binned_entropy, list(range(100)), -math.log(1 / 2), 2 ) def test_sample_entropy(self): @@ -1512,24 +1512,22 @@ def test_value_count(self): self.assertEqualPandasSeriesWrapper(value_count, [1] * 10, 10, value=1) self.assertEqualPandasSeriesWrapper(value_count, list(range(10)), 1, value=0) self.assertEqualPandasSeriesWrapper(value_count, [1] * 10, 0, value=0) - self.assertEqualPandasSeriesWrapper(value_count, [np.NaN, 0, 1] * 3, 3, value=0) + self.assertEqualPandasSeriesWrapper(value_count, [np.nan, 0, 1] * 3, 3, value=0) self.assertEqualPandasSeriesWrapper( - value_count, [np.NINF, 0, 1] * 3, 3, value=0 - ) - self.assertEqualPandasSeriesWrapper( - value_count, [np.PINF, 0, 1] * 3, 3, value=0 + value_count, [-np.inf, 0, 1] * 3, 3, value=0 ) + self.assertEqualPandasSeriesWrapper(value_count, [np.inf, 0, 1] * 3, 3, value=0) self.assertEqualPandasSeriesWrapper( value_count, [0.1, 0.2, 0.3] * 3, 3, value=0.2 ) self.assertEqualPandasSeriesWrapper( - value_count, [np.NaN, 0, 1] * 3, 3, value=np.NaN + value_count, [np.nan, 0, 1] * 3, 3, value=np.nan ) self.assertEqualPandasSeriesWrapper( - value_count, [np.NINF, 0, 1] * 3, 3, value=np.NINF + value_count, [-np.inf, 0, 1] * 3, 3, value=-np.inf ) self.assertEqualPandasSeriesWrapper( - value_count, [np.PINF, 0, 1] * 3, 3, value=np.PINF + value_count, [np.inf, 0, 1] * 3, 3, value=np.inf ) def test_range_count(self): @@ -1546,7 +1544,7 @@ def test_range_count(self): range_count, list(range(0, -10, -1)), 9, min=-10, max=0 ) self.assertEqualPandasSeriesWrapper( - range_count, [np.NaN, np.PINF, np.NINF] + list(range(10)), 10, min=0, max=10 + range_count, [np.nan, np.inf, -np.inf] + list(range(10)), 10, min=0, max=10 ) def test_approximate_entropy(self): @@ -1685,7 +1683,7 @@ def test__aggregate_on_chunks(self): ) self.assertListEqual( _aggregate_on_chunks( - x=pd.Series([0, 1, 2, np.NaN, 5]), f_agg="median", chunk_len=2 + x=pd.Series([0, 1, 2, np.nan, 5]), f_agg="median", chunk_len=2 ), [0.5, 2, 5], ) @@ -1728,7 +1726,7 @@ def test_agg_linear_trend(self): self.assertAlmostEqual(res['attr_"intercept"__chunk_len_3__f_agg_"median"'], 1) self.assertAlmostEqual(res['attr_"slope"__chunk_len_3__f_agg_"median"'], 3) - x = pd.Series([np.NaN, np.NaN, np.NaN, -3, -3, -3]) + x = pd.Series([np.nan, np.nan, np.nan, -3, -3, -3]) res = agg_linear_trend(x=x, param=param) res = pd.Series(dict(res)) @@ -1742,7 +1740,7 @@ def test_agg_linear_trend(self): self.assertIsNaN(res['attr_"intercept"__chunk_len_3__f_agg_"median"']) self.assertIsNaN(res['attr_"slope"__chunk_len_3__f_agg_"median"']) - x = pd.Series([np.NaN, np.NaN, -3, -3, -3, -3]) + x = pd.Series([np.nan, np.nan, -3, -3, -3, -3]) res = agg_linear_trend(x=x, param=param) res = pd.Series(dict(res)) @@ -1946,19 +1944,19 @@ def test_count_above(self): self.assertEqualPandasSeriesWrapper( count_above, [0.1, 0.2, 0.3] * 3, 2 / 3, t=0.2 ) - self.assertEqualPandasSeriesWrapper(count_above, [np.NaN, 0, 1] * 3, 2 / 3, t=0) + self.assertEqualPandasSeriesWrapper(count_above, [np.nan, 0, 1] * 3, 2 / 3, t=0) self.assertEqualPandasSeriesWrapper( - count_above, [np.NINF, 0, 1] * 3, 2 / 3, t=0 + count_above, [-np.inf, 0, 1] * 3, 2 / 3, t=0 ) - self.assertEqualPandasSeriesWrapper(count_above, [np.PINF, 0, 1] * 3, 1, t=0) + self.assertEqualPandasSeriesWrapper(count_above, [np.inf, 0, 1] * 3, 1, t=0) self.assertEqualPandasSeriesWrapper( - count_above, [np.NaN, 0, 1] * 3, 0, t=np.NaN + count_above, [np.nan, 0, 1] * 3, 0, t=np.nan ) self.assertEqualPandasSeriesWrapper( - count_above, [np.NINF, 0, np.PINF] * 3, 1, t=np.NINF + count_above, [-np.inf, 0, np.inf] * 3, 1, t=-np.inf ) self.assertEqualPandasSeriesWrapper( - count_above, [np.PINF, 0, 1] * 3, 1 / 3, t=np.PINF + count_above, [np.inf, 0, 1] * 3, 1 / 3, t=np.inf ) def test_count_below(self): @@ -1968,21 +1966,19 @@ def test_count_below(self): self.assertEqualPandasSeriesWrapper( count_below, [0.1, 0.2, 0.3] * 3, 2 / 3, t=0.2 ) - self.assertEqualPandasSeriesWrapper(count_below, [np.NaN, 0, 1] * 3, 1 / 3, t=0) - self.assertEqualPandasSeriesWrapper( - count_below, [np.NINF, 0, 1] * 3, 2 / 3, t=0 - ) + self.assertEqualPandasSeriesWrapper(count_below, [np.nan, 0, 1] * 3, 1 / 3, t=0) self.assertEqualPandasSeriesWrapper( - count_below, [np.PINF, 0, 1] * 3, 1 / 3, t=0 + count_below, [-np.inf, 0, 1] * 3, 2 / 3, t=0 ) + self.assertEqualPandasSeriesWrapper(count_below, [np.inf, 0, 1] * 3, 1 / 3, t=0) self.assertEqualPandasSeriesWrapper( - count_below, [np.NaN, 0, 1] * 3, 0, t=np.NaN + count_below, [np.nan, 0, 1] * 3, 0, t=np.nan ) self.assertEqualPandasSeriesWrapper( - count_below, [np.NINF, 0, np.PINF] * 3, 1 / 3, t=np.NINF + count_below, [-np.inf, 0, np.inf] * 3, 1 / 3, t=-np.inf ) self.assertEqualPandasSeriesWrapper( - count_below, [np.PINF, 0, 1] * 3, 1, t=np.PINF + count_below, [np.inf, 0, 1] * 3, 1, t=np.inf ) def test_benford_correlation(self): diff --git a/tests/units/feature_extraction/test_settings.py b/tests/units/feature_extraction/test_settings.py index 36f49001..bb0d4c12 100644 --- a/tests/units/feature_extraction/test_settings.py +++ b/tests/units/feature_extraction/test_settings.py @@ -92,7 +92,7 @@ def test_from_column_correct_for_selected_columns(self): self.assertEqual( kind_to_fc_parameters[tsn]["value_count"], - [{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}], + [{"value": np.inf}, {"value": -np.inf}, {"value": np.nan}], ) def test_from_column_correct_for_comprehensive_fc_parameters(self): diff --git a/tests/units/feature_selection/test_checks.py b/tests/units/feature_selection/test_checks.py index ff05f42b..b5460fce 100644 --- a/tests/units/feature_selection/test_checks.py +++ b/tests/units/feature_selection/test_checks.py @@ -18,12 +18,12 @@ @pytest.fixture() def binary_series_with_nan(): - return pd.Series([np.NaN, 1, 1]) + return pd.Series([np.nan, 1, 1]) @pytest.fixture() def real_series_with_nan(): - return pd.Series([np.NaN, 1, 2]) + return pd.Series([np.nan, 1, 2]) @pytest.fixture() diff --git a/tests/units/transformers/test_per_column_imputer.py b/tests/units/transformers/test_per_column_imputer.py index 73a787b0..343f7a32 100644 --- a/tests/units/transformers/test_per_column_imputer.py +++ b/tests/units/transformers/test_per_column_imputer.py @@ -32,8 +32,8 @@ def test_only_nans_and_infs(self): X = pd.DataFrame(index=list(range(100))) X["NaNs"] = np.nan * np.ones(100) - X["PINF"] = np.PINF * np.ones(100) - X["NINF"] = np.NINF * np.ones(100) + X["PINF"] = np.inf * np.ones(100) + X["NINF"] = -np.inf * np.ones(100) with warnings.catch_warnings(record=True) as w: imputer.fit(X) @@ -53,8 +53,8 @@ def test_with_numpy_array(self): X = pd.DataFrame(index=list(range(100))) X["NaNs"] = np.nan * np.ones(100) - X["PINF"] = np.PINF * np.ones(100) - X["NINF"] = np.NINF * np.ones(100) + X["PINF"] = np.inf * np.ones(100) + X["NINF"] = -np.inf * np.ones(100) X_numpy = X.values.copy() @@ -87,7 +87,7 @@ def test_with_numpy_array(self): def test_standard_replacement_behavior(self): imputer = PerColumnImputer() - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0] X = pd.DataFrame({"a": data}) true_X = pd.DataFrame({"a": truth}) @@ -98,7 +98,7 @@ def test_standard_replacement_behavior(self): pdt.assert_frame_equal(selected_X, true_X) def test_partial_preset_col_to_NINF_given(self): - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0] X = pd.DataFrame({"a": data}) true_X = pd.DataFrame({"a": truth}) @@ -112,7 +112,7 @@ def test_partial_preset_col_to_NINF_given(self): pdt.assert_frame_equal(selected_X, true_X) def test_partial_preset_col_to_PINF_given(self): - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0] X = pd.DataFrame({"a": data}) true_X = pd.DataFrame({"a": truth}) @@ -126,7 +126,7 @@ def test_partial_preset_col_to_PINF_given(self): pdt.assert_frame_equal(selected_X, true_X) def test_partial_preset_col_to_NAN_given(self): - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0] X = pd.DataFrame({"a": data}) true_X = pd.DataFrame({"a": truth}) @@ -151,7 +151,7 @@ def test_different_shapes_fitted_and_transformed(self): self.assertRaises(ValueError, imputer.transform, X) def test_preset_has_higher_priority_than_fit(self): - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] truth = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0] X = pd.DataFrame({"a": data}) @@ -166,8 +166,8 @@ def test_preset_has_higher_priority_than_fit(self): pdt.assert_frame_equal(selected_X, true_X) def test_only_parameters_of_last_fit_count(self): - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] - data_2 = [np.NINF, np.PINF, np.nan, 10.0, -10.0, 3.0, 3.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] + data_2 = [-np.inf, np.inf, np.nan, 10.0, -10.0, 3.0, 3.0] truth_a = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0] truth_b = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0] @@ -185,7 +185,7 @@ def test_only_parameters_of_last_fit_count(self): pdt.assert_frame_equal(selected_X, true_X) def test_only_subset_of_columns_given(self): - data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0] + data = [-np.inf, np.inf, np.nan, 100.0, -100.0, 1.0, 1.0] truth_a = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0] truth_b = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0] X = pd.DataFrame({"a": data, "b": data}) diff --git a/tests/units/utilities/test_dataframe_functions.py b/tests/units/utilities/test_dataframe_functions.py index b7cc5b44..e6bb6d66 100644 --- a/tests/units/utilities/test_dataframe_functions.py +++ b/tests/units/utilities/test_dataframe_functions.py @@ -21,7 +21,7 @@ def test_with_wrong_input(self): "id": [0, 0], "kind": ["a", "b"], "value": [3, 3], - "sort": [np.NaN, np.NaN], + "sort": [np.nan, np.nan], } ) self.assertRaises( @@ -133,7 +133,7 @@ def test_with_wrong_input(self): ) def test_assert_single_row(self): - test_df = pd.DataFrame([{"id": np.NaN, "kind": "a", "value": 3, "sort": 1}]) + test_df = pd.DataFrame([{"id": np.nan, "kind": "a", "value": 3, "sort": 1}]) self.assertRaises( ValueError, dataframe_functions.roll_time_series, @@ -1044,7 +1044,7 @@ def test_all_columns(self): # should not raise an exception dataframe_functions.check_for_nans_in_columns(test_df) - test_df = pd.DataFrame([[1, 2, 3], [4, np.NaN, 6]], index=[0, 1]) + test_df = pd.DataFrame([[1, 2, 3], [4, np.nan, 6]], index=[0, 1]) self.assertRaises( ValueError, dataframe_functions.check_for_nans_in_columns, test_df @@ -1052,7 +1052,7 @@ def test_all_columns(self): def test_not_all_columns(self): test_df = pd.DataFrame( - [[1, 2, 3], [4, np.NaN, 6]], index=[0, 1], columns=["a", "b", "c"] + [[1, 2, 3], [4, np.nan, 6]], index=[0, 1], columns=["a", "b", "c"] ) self.assertRaises( @@ -1083,33 +1083,33 @@ def test_not_all_columns(self): class ImputeTestCase(TestCase): def test_impute_zero(self): - df = pd.DataFrame([{"value": np.NaN}]) + df = pd.DataFrame([{"value": np.nan}]) dataframe_functions.impute_dataframe_zero(df) self.assertEqual(list(df.value), [0]) - df = pd.DataFrame([{"value": np.PINF}]) + df = pd.DataFrame([{"value": np.inf}]) dataframe_functions.impute_dataframe_zero(df) self.assertEqual(list(df.value), [0]) - df = pd.DataFrame([{"value": np.NINF}]) + df = pd.DataFrame([{"value": -np.inf}]) dataframe_functions.impute_dataframe_zero(df) self.assertEqual(list(df.value), [0]) df = pd.DataFrame( - [{"value": np.NINF}, {"value": np.NaN}, {"value": np.PINF}, {"value": 1}] + [{"value": -np.inf}, {"value": np.nan}, {"value": np.inf}, {"value": 1}] ) dataframe_functions.impute_dataframe_zero(df) self.assertEqual(list(df.value), [0, 0, 0, 1]) df = pd.DataFrame( - [{"value": np.NINF}, {"value": np.NaN}, {"value": np.PINF}, {"value": 1}] + [{"value": -np.inf}, {"value": np.nan}, {"value": np.inf}, {"value": 1}] ) df = df.astype(np.float64) df = dataframe_functions.impute_dataframe_zero(df) self.assertEqual(list(df.value), [0, 0, 0, 1]) df = pd.DataFrame( - [{"value": np.NINF}, {"value": np.NaN}, {"value": np.PINF}, {"value": 1}] + [{"value": -np.inf}, {"value": np.nan}, {"value": np.inf}, {"value": 1}] ) df = df.astype(np.float32) df = dataframe_functions.impute_dataframe_zero(df) @@ -1122,7 +1122,7 @@ def test_impute_zero(self): def test_toplevel_impute(self): df = pd.DataFrame( - np.transpose([[0, 1, 2, np.NaN], [1, np.PINF, 2, 3], [1, -3, np.NINF, 3]]), + np.transpose([[0, 1, 2, np.nan], [1, np.inf, 2, 3], [1, -3, -np.inf, 3]]), columns=["value_a", "value_b", "value_c"], ) @@ -1134,7 +1134,7 @@ def test_toplevel_impute(self): df = pd.DataFrame( np.transpose( - [[0, 1, 2, np.NaN], [1, np.PINF, 2, np.NaN], [np.NaN, -3, np.NINF, 3]] + [[0, 1, 2, np.nan], [1, np.inf, 2, np.nan], [np.nan, -3, -np.inf, 3]] ), columns=["value_a", "value_b", "value_c"], ) @@ -1147,7 +1147,7 @@ def test_toplevel_impute(self): df = pd.DataFrame( np.transpose( - [[0, 1, 2, np.NaN], [1, np.PINF, 2, 3], [np.PINF, -3, np.NINF, 3]] + [[0, 1, 2, np.nan], [1, np.inf, 2, 3], [np.inf, -3, -np.inf, 3]] ), columns=["value_a", "value_b", "value_c"], ) @@ -1167,7 +1167,7 @@ def test_impute_range(self): def get_df(): return pd.DataFrame( np.transpose( - [[0, 1, 2, np.NaN], [1, np.PINF, 2, 3], [1, -3, np.NINF, 3]] + [[0, 1, 2, np.nan], [1, np.inf, 2, 3], [1, -3, -np.inf, 3]] ), columns=["value_a", "value_b", "value_c"], ) @@ -1208,7 +1208,7 @@ def get_df(): # check for error if replacement value is not finite df = get_df() - col_to_max = {"value_a": 200, "value_b": np.NaN, "value_c": 200} + col_to_max = {"value_a": 200, "value_b": np.nan, "value_c": 200} col_to_min = {"value_a": -134, "value_b": -134, "value_c": -134} col_to_median = {"value_a": 55, "value_b": 55, "value_c": 55} self.assertRaises( @@ -1221,7 +1221,7 @@ def get_df(): ) df = get_df() col_to_max = {"value_a": 200, "value_b": 200, "value_c": 200} - col_to_min = {"value_a": -134, "value_b": np.NINF, "value_c": -134} + col_to_min = {"value_a": -134, "value_b": -np.inf, "value_c": -134} col_to_median = {"value_a": 55, "value_b": 55, "value_c": 55} self.assertRaises( ValueError, @@ -1235,7 +1235,7 @@ def get_df(): df = get_df() col_to_max = {"value_a": 200, "value_b": 200, "value_c": 200} col_to_min = {"value_a": -134, "value_b": -134, "value_c": -134} - col_to_median = {"value_a": 55, "value_b": 55, "value_c": np.PINF} + col_to_median = {"value_a": 55, "value_b": 55, "value_c": np.inf} self.assertRaises( ValueError, dataframe_functions.impute_dataframe_range, @@ -1302,7 +1302,7 @@ def test_restrict_wrong(self): class GetRangeValuesPerColumnTestCase(TestCase): def test_ignores_non_finite_values(self): - df = pd.DataFrame([0, 1, 2, 3, np.NaN, np.PINF, np.NINF], columns=["value"]) + df = pd.DataFrame([0, 1, 2, 3, np.nan, np.inf, -np.inf], columns=["value"]) ( col_to_max, @@ -1341,7 +1341,7 @@ def test_range_values_correct_with_uneven_length(self): self.assertEqual(col_to_median, {"value": 1}) def test_no_finite_values_yields_0(self): - df = pd.DataFrame([np.NaN, np.PINF, np.NINF], columns=["value"]) + df = pd.DataFrame([np.nan, np.inf, -np.inf], columns=["value"]) with warnings.catch_warnings(record=True) as w: ( diff --git a/tsfresh/feature_extraction/feature_calculators.py b/tsfresh/feature_extraction/feature_calculators.py index 71d4beb1..38f2bbf9 100644 --- a/tsfresh/feature_extraction/feature_calculators.py +++ b/tsfresh/feature_extraction/feature_calculators.py @@ -156,7 +156,7 @@ def _estimate_friedrich_coefficients(x, m, r): try: df["quantiles"] = pd.qcut(df.signal, r) except (ValueError, IndexError): - return [np.NaN] * (m + 1) + return [np.nan] * (m + 1) quantiles = df.groupby("quantiles") @@ -168,7 +168,7 @@ def _estimate_friedrich_coefficients(x, m, r): try: return np.polyfit(result.x_mean, result.y_mean, deg=m) except (np.linalg.LinAlgError, ValueError): - return [np.NaN] * (m + 1) + return [np.nan] * (m + 1) def _aggregate_on_chunks(x, f_agg, chunk_len): @@ -518,11 +518,11 @@ def compute_adf(autolag): try: return adfuller(x, autolag=autolag) except LinAlgError: - return np.NaN, np.NaN, np.NaN + return np.nan, np.nan, np.nan except ValueError: # occurs if sample size is too small - return np.NaN, np.NaN, np.NaN + return np.nan, np.nan, np.nan except MissingDataError: # is thrown for e.g. inf or nan in the data - return np.NaN, np.NaN, np.NaN + return np.nan, np.nan, np.nan res = [] for config in param: @@ -538,7 +538,7 @@ def compute_adf(autolag): elif config["attr"] == "usedlag": res.append((index, adf[2])) else: - res.append((index, np.NaN)) + res.append((index, np.nan)) return res @@ -635,7 +635,7 @@ def mean_change(x): :return type: float """ x = np.asarray(x) - return (x[-1] - x[0]) / (len(x) - 1) if len(x) > 1 else np.NaN + return (x[-1] - x[0]) / (len(x) - 1) if len(x) > 1 else np.nan @set_property("fctype", "simple") @@ -653,7 +653,7 @@ def mean_second_derivative_central(x): :return type: float """ x = np.asarray(x) - return (x[-1] - x[-2] - x[1] + x[0]) / (2 * (len(x) - 2)) if len(x) > 2 else np.NaN + return (x[-1] - x[-2] - x[1] + x[0]) / (2 * (len(x) - 2)) if len(x) > 2 else np.nan @set_property("fctype", "simple") @@ -788,7 +788,7 @@ def root_mean_square(x): :return: the value of this feature :return type: float """ - return np.sqrt(np.mean(np.square(x))) if len(x) > 0 else np.NaN + return np.sqrt(np.mean(np.square(x))) if len(x) > 0 else np.nan @set_property("fctype", "simple") @@ -878,7 +878,7 @@ def last_location_of_maximum(x): :return type: float """ x = np.asarray(x) - return 1.0 - np.argmax(x[::-1]) / len(x) if len(x) > 0 else np.NaN + return 1.0 - np.argmax(x[::-1]) / len(x) if len(x) > 0 else np.nan @set_property("fctype", "simple") @@ -894,7 +894,7 @@ def first_location_of_maximum(x): """ if not isinstance(x, (np.ndarray, pd.Series)): x = np.asarray(x) - return np.argmax(x) / len(x) if len(x) > 0 else np.NaN + return np.argmax(x) / len(x) if len(x) > 0 else np.nan @set_property("fctype", "simple") @@ -909,7 +909,7 @@ def last_location_of_minimum(x): :return type: float """ x = np.asarray(x) - return 1.0 - np.argmin(x[::-1]) / len(x) if len(x) > 0 else np.NaN + return 1.0 - np.argmin(x[::-1]) / len(x) if len(x) > 0 else np.nan @set_property("fctype", "simple") @@ -925,7 +925,7 @@ def first_location_of_minimum(x): """ if not isinstance(x, (np.ndarray, pd.Series)): x = np.asarray(x) - return np.argmin(x) / len(x) if len(x) > 0 else np.NaN + return np.argmin(x) / len(x) if len(x) > 0 else np.nan @set_property("fctype", "simple") @@ -1109,7 +1109,7 @@ def complex_agg(x, agg): res = [ complex_agg(fft[config["coeff"]], config["attr"]) if config["coeff"] < len(fft) - else np.NaN + else np.nan for config in param ] index = [ @@ -1291,7 +1291,7 @@ def index_mass_quantile(x, param): if s == 0: # all values in x are zero or it has length 0 - return [("q_{}".format(config["q"]), np.NaN) for config in param] + return [("q_{}".format(config["q"]), np.nan) for config in param] else: # at least one value is not zero mass_centralized = np.cumsum(abs_x) / s @@ -1393,7 +1393,7 @@ def cwt_coefficients(x, param): i = widths.index(w) if calculated_cwt_for_widths.shape[1] <= coeff: - res += [np.NaN] + res += [np.nan] else: res += [calculated_cwt_for_widths[i, coeff]] @@ -1433,7 +1433,7 @@ def spkt_welch_density(x, param): # Fill up the rest of the requested coefficients with np.NaNs return zip( indices, - list(pxx[reduced_coeff]) + [np.NaN] * len(not_calculated_coefficients), + list(pxx[reduced_coeff]) + [np.nan] * len(not_calculated_coefficients), ) else: return zip(indices, pxx[coeff]) @@ -1477,7 +1477,7 @@ def ar_coefficient(x, param): calculated_AR = AutoReg(x_as_list, lags=k, trend="c") calculated_ar_params[k] = calculated_AR.fit().params except (ZeroDivisionError, LinAlgError, ValueError): - calculated_ar_params[k] = [np.NaN] * k + calculated_ar_params[k] = [np.nan] * k mod = calculated_ar_params[k] if p <= k: @@ -1486,7 +1486,7 @@ def ar_coefficient(x, param): except IndexError: res[column_name] = 0 else: - res[column_name] = np.NaN + res[column_name] = np.nan return [(key, value) for key, value in res.items()] @@ -1521,7 +1521,7 @@ def change_quantiles(x, ql, qh, isabs, f_agg): if isabs: div = np.abs(div) # All values that originate from the corridor between the quantiles ql and qh will have the category 0, - # other will be np.NaN + # other will be np.nan try: bin_cat = pd.qcut(x, [ql, qh], labels=False) bin_cat_0 = bin_cat == 0 @@ -1644,7 +1644,7 @@ def mean_n_absolute_max(x, number_of_maxima): n_absolute_maximum_values = np.sort(np.absolute(x))[-number_of_maxima:] - return np.mean(n_absolute_maximum_values) if len(x) > number_of_maxima else np.NaN + return np.mean(n_absolute_maximum_values) if len(x) > number_of_maxima else np.nan @set_property("fctype", "simple") @@ -1937,7 +1937,7 @@ def autocorrelation(x, lag): # Return the normalized unbiased covariance v = np.var(x) if np.isclose(v, 0): - return np.NaN + return np.nan else: return sum_product / ((len(x) - lag) * v) @@ -1955,7 +1955,7 @@ def quantile(x, q): :return type: float """ if len(x) == 0: - return np.NaN + return np.nan return np.quantile(x, q) @@ -2006,7 +2006,7 @@ def absolute_maximum(x): :return: the value of this feature :return type: float """ - return np.max(np.absolute(x)) if len(x) > 0 else np.NaN + return np.max(np.absolute(x)) if len(x) > 0 else np.nan @set_property("fctype", "simple") @@ -2110,7 +2110,7 @@ def friedrich_coefficients(x, param): try: res["coeff_{}__m_{}__r_{}".format(coeff, m, r)] = calculated[m][r][coeff] except IndexError: - res["coeff_{}__m_{}__r_{}".format(coeff, m, r)] = np.NaN + res["coeff_{}__m_{}__r_{}".format(coeff, m, r)] = np.nan return [(key, value) for key, value in res.items()] @@ -2186,7 +2186,7 @@ def agg_linear_trend(x, param): if f_agg not in calculated_agg or chunk_len not in calculated_agg[f_agg]: if chunk_len >= len(x): - calculated_agg[f_agg][chunk_len] = np.NaN + calculated_agg[f_agg][chunk_len] = np.nan else: aggregate_result = _aggregate_on_chunks(x, f_agg, chunk_len) lin_reg_result = linregress( @@ -2197,7 +2197,7 @@ def agg_linear_trend(x, param): attr = parameter_combination["attr"] if chunk_len >= len(x): - res_data.append(np.NaN) + res_data.append(np.nan) else: res_data.append(getattr(calculated_agg[f_agg][chunk_len], attr)) @@ -2241,7 +2241,7 @@ def energy_ratio_by_chunks(x, param): assert num_segments > 0 if full_series_energy == 0: - res_data.append(np.NaN) + res_data.append(np.nan) else: res_data.append( np.sum(np.array_split(x, num_segments)[segment_focus] ** 2.0) diff --git a/tsfresh/feature_selection/relevance.py b/tsfresh/feature_selection/relevance.py index dc06ec57..db47d3a5 100644 --- a/tsfresh/feature_selection/relevance.py +++ b/tsfresh/feature_selection/relevance.py @@ -214,7 +214,7 @@ def calculate_relevance_table( table_binary = relevance_table[relevance_table.type == "binary"].copy() table_const = relevance_table[relevance_table.type == "constant"].copy() - table_const["p_value"] = np.NaN + table_const["p_value"] = np.nan table_const["relevant"] = False if not table_const.empty: diff --git a/tsfresh/utilities/dataframe_functions.py b/tsfresh/utilities/dataframe_functions.py index ca6e128d..46dab7ca 100644 --- a/tsfresh/utilities/dataframe_functions.py +++ b/tsfresh/utilities/dataframe_functions.py @@ -91,7 +91,7 @@ def impute_dataframe_zero(df_impute): if len(df_impute) == 0: return df_impute - df_impute.replace([np.PINF, np.NINF], 0, inplace=True) + df_impute.replace([np.inf, -np.inf], 0, inplace=True) df_impute.fillna(0, inplace=True) # Ensure a type of "np.float64" @@ -165,8 +165,8 @@ def impute_dataframe_range(df_impute, col_to_max, col_to_min, col_to_median): [col_to_median] * len(df_impute), index=df_impute.index ) - df_impute.where(df_impute.values != np.PINF, other=col_to_max, inplace=True) - df_impute.where(df_impute.values != np.NINF, other=col_to_min, inplace=True) + df_impute.where(df_impute.values != np.inf, other=col_to_max, inplace=True) + df_impute.where(df_impute.values != -np.inf, other=col_to_min, inplace=True) df_impute.where(~np.isnan(df_impute.values), other=col_to_median, inplace=True) df_impute.astype(np.float64, copy=False) diff --git a/tsfresh/utilities/string_manipulation.py b/tsfresh/utilities/string_manipulation.py index 107a764a..5b0efe34 100644 --- a/tsfresh/utilities/string_manipulation.py +++ b/tsfresh/utilities/string_manipulation.py @@ -33,11 +33,11 @@ def get_config_from_string(parts): for key, value in zip(config_kwargs, config_values): if value.lower() == "nan": - dict_if_configs[key] = np.NaN + dict_if_configs[key] = np.nan elif value.lower() == "-inf": - dict_if_configs[key] = np.NINF + dict_if_configs[key] = -np.inf elif value.lower() == "inf": - dict_if_configs[key] = np.PINF + dict_if_configs[key] = np.inf else: dict_if_configs[key] = ast.literal_eval(value)