From e9dbc160f3c59856c5f20e420330d7be5e7daf29 Mon Sep 17 00:00:00 2001 From: arunjose696 Date: Thu, 23 May 2024 15:48:39 -0500 Subject: [PATCH] All but one test passing --- .../pandas/small_query_compiler.py | 40 +++++++++++++++++-- modin/pandas/series.py | 34 +++++----------- 2 files changed, 46 insertions(+), 28 deletions(-) diff --git a/modin/experimental/core/storage_formats/pandas/small_query_compiler.py b/modin/experimental/core/storage_formats/pandas/small_query_compiler.py index d6767478926..ddba152863d 100644 --- a/modin/experimental/core/storage_formats/pandas/small_query_compiler.py +++ b/modin/experimental/core/storage_formats/pandas/small_query_compiler.py @@ -21,6 +21,7 @@ import numpy as np import pandas from pandas.core.dtypes.common import is_list_like, is_scalar +from typing import Optional from modin.config.envvars import UsePlainPandasQueryCompiler from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler @@ -587,7 +588,11 @@ def caller(query_compiler, *args, **kwargs): args = try_cast_to_pandas(args, squeeze=squeeze_args) kwargs = try_cast_to_pandas(kwargs, squeeze=squeeze_kwargs) result = func(df, *args, **kwargs) + inplace_method = kwargs.get("inplace", False) + if in_place: + inplace_method = in_place + if inplace_method: result = df if not (return_modin or isinstance(result, (pandas.Series, pandas.DataFrame))): return result @@ -595,6 +600,7 @@ def caller(query_compiler, *args, **kwargs): if result.name is None: result.name = MODIN_UNNAMED_SERIES_LABEL result = result.to_frame() + return query_compiler.__constructor__(result) @@ -692,7 +698,6 @@ def setitem_bool(self, row_loc, col_loc, item): cumprod = _register_default_pandas(pandas.DataFrame.cumprod) cumsum = _register_default_pandas(pandas.DataFrame.cumsum) delitem = _register_default_pandas(_delitem) - describe = _register_default_pandas(pandas.DataFrame.describe) df_update = _register_default_pandas( pandas.DataFrame.update, in_place=True, df_copy=True ) @@ -855,7 +860,7 @@ def setitem_bool(self, row_loc, col_loc, item): ) isna = _register_default_pandas(pandas.DataFrame.isna) join = _register_default_pandas(pandas.DataFrame.join) - kurt = _register_default_pandas(pandas.DataFrame.kurt) + kurt = _register_default_pandas(pandas.DataFrame.kurt, return_modin=False) last_valid_index = _register_default_pandas( pandas.DataFrame.last_valid_index, return_modin=False ) @@ -866,7 +871,7 @@ def setitem_bool(self, row_loc, col_loc, item): max = _register_default_pandas(pandas.DataFrame.max) map = _register_default_pandas(pandas.DataFrame.map) mean = _register_default_pandas(pandas.DataFrame.mean, return_modin=False) - median = _register_default_pandas(pandas.DataFrame.median) + median = _register_default_pandas(pandas.DataFrame.median, return_modin=False) melt = _register_default_pandas(pandas.DataFrame.melt) memory_usage = _register_default_pandas(pandas.DataFrame.memory_usage) merge = _register_default_pandas(pandas.DataFrame.merge) @@ -964,7 +969,7 @@ def setitem_bool(self, row_loc, col_loc, item): series_view = _register_default_pandas(pandas.Series.view, is_series=True) set_index_from_columns = _register_default_pandas(pandas.DataFrame.set_index) setitem = _register_default_pandas(_setitem) - skew = _register_default_pandas(pandas.DataFrame.skew) + skew = _register_default_pandas(pandas.DataFrame.skew, return_modin=False) sort_index = _register_default_pandas(_sort_index) sort_columns_by_row_values = _register_default_pandas( lambda df, columns, **kwargs: df.sort_values(by=columns, axis=1, **kwargs) @@ -1056,6 +1061,12 @@ def clip(self, lower, upper, **kwargs): return _register_default_pandas(pandas.DataFrame.clip)( self, lower, upper, **kwargs ) + def describe(self, percentiles: np.ndarray): + return _register_default_pandas(pandas.DataFrame.describe)( + self, + percentiles=percentiles, + include="all", + ) def dot(self, other, squeeze_self=None, squeeze_other=None): other = try_cast_to_pandas(other) @@ -1223,3 +1234,24 @@ def getitem_column_array(self, key, numeric=False, ignore_order=False): def is_series_like(self): return len(self._modin_frame.columns) == 1 or len(self._modin_frame.index) == 1 + + def support_materialization_in_worker_process(self) -> bool: + """ + Whether it's possible to call function `to_pandas` during the pickling process, at the moment of recreating the object. + + Returns + ------- + bool + """ + True + + def get_pandas_backend(self) -> Optional[str]: + """ + Get backend stored in `_modin_frame`. + + Returns + ------- + str | None + Backend name. + """ + return None \ No newline at end of file diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 749cf0f6a50..720a8a4dc1a 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -147,32 +147,18 @@ def __init__( name = MODIN_UNNAMED_SERIES_LABEL if isinstance(data, pandas.Series) and data.name is not None: name = data.name - if UsePlainPandasQueryCompiler.get(): - query_compiler = PlainPandasQueryCompiler( - pandas.DataFrame( - pandas.Series( - data=data, - index=index, - dtype=dtype, - name=name, - copy=copy, - fastpath=fastpath, - ) + query_compiler = from_pandas( + pandas.DataFrame( + pandas.Series( + data=data, + index=index, + dtype=dtype, + name=name, + copy=copy, + fastpath=fastpath, ) ) - else: - query_compiler = from_pandas( - pandas.DataFrame( - pandas.Series( - data=data, - index=index, - dtype=dtype, - name=name, - copy=copy, - fastpath=fastpath, - ) - ) - )._query_compiler + )._query_compiler self._query_compiler = query_compiler.columnarize() if name is not None: self.name = name