Skip to content

Commit

Permalink
PERF-modin-project#7068: Provide shape_hint="column" for some more op…
Browse files Browse the repository at this point in the history
…erations with Series (modin-project#7069)

Signed-off-by: Iaroslav Igoshev <iaroslav.igoshev@intel.com>
  • Loading branch information
YarShev authored Mar 15, 2024
1 parent cbb3b5d commit 21db3be
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 22 deletions.
4 changes: 1 addition & 3 deletions modin/core/dataframe/algebra/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def register(cls, function, *call_args, **call_kwds):

def caller(query_compiler, *args, **kwargs):
"""Execute Map function against passed query compiler."""
shape_hint = call_kwds.pop("shape_hint", None) or kwargs.pop(
"shape_hint", None
)
shape_hint = call_kwds.pop("shape_hint", None) or query_compiler._shape_hint
return query_compiler.__constructor__(
query_compiler._modin_frame.map(
lambda x: function(x, *args, **kwargs), *call_args, **call_kwds
Expand Down
3 changes: 1 addition & 2 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1567,11 +1567,10 @@ def isin(self, values, ignore_indices=False, **kwargs): # noqa: PR02
Boolean mask for self of whether an element at the corresponding
position is contained in `values`.
"""
shape_hint = kwargs.pop("shape_hint", None)
if isinstance(values, type(self)) and ignore_indices:
# Pandas logic is that it ignores indexing if 'values' is a 1D object
values = values.to_pandas().squeeze(axis=1)
if shape_hint == "column":
if self._shape_hint == "column":
return SeriesDefault.register(pandas.Series.isin)(self, values, **kwargs)
else:
return DataFrameDefault.register(pandas.DataFrame.isin)(
Expand Down
8 changes: 6 additions & 2 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,7 +1702,8 @@ def stack(self, level, dropna):

# Map partitions operations
# These operations are operations that apply a function to every partition.
def isin(self, values, ignore_indices=False, shape_hint=None):
def isin(self, values, ignore_indices=False):
shape_hint = self._shape_hint
if isinstance(values, type(self)):
# HACK: if we don't cast to pandas, then the execution engine will try to
# propagate the distributed Series to workers and most likely would have
Expand Down Expand Up @@ -1975,7 +1976,10 @@ def astype(self, col_dtypes, errors: str = "raise"):
# other query compilers may not take care of error handling at the API
# layer. This query compiler assumes there won't be any errors due to
# invalid type keys.
return self.__constructor__(self._modin_frame.astype(col_dtypes, errors=errors))
return self.__constructor__(
self._modin_frame.astype(col_dtypes, errors=errors),
shape_hint=self._shape_hint,
)

def infer_objects(self):
return self.__constructor__(self._modin_frame.infer_objects())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ def astype(self, col_dtypes, errors: str = "raise"):
)
return self.__constructor__(
self._modin_frame.astype(col_dtypes),
self._shape_hint,
shape_hint=self._shape_hint,
)

def setitem(self, axis, key, value):
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1919,7 +1919,7 @@ def convert_dtypes(
)
)

def isin(self, values, **kwargs): # noqa: PR01, RT01, D200
def isin(self, values): # noqa: PR01, RT01, D200
"""
Whether elements in `BasePandasDataset` are contained in `values`.
"""
Expand All @@ -1929,7 +1929,7 @@ def isin(self, values, **kwargs): # noqa: PR01, RT01, D200
values = getattr(values, "_query_compiler", values)
return self.__constructor__(
query_compiler=self._query_compiler.isin(
values=values, ignore_indices=ignore_indices, **kwargs
values=values, ignore_indices=ignore_indices
)
)

Expand Down
6 changes: 0 additions & 6 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,12 +1107,6 @@ def insert(

self._update_inplace(new_query_compiler=new_query_compiler)

def isin(self, values): # noqa: PR01, RT01, D200
"""
Whether elements in `DataFrame` are contained in `values`.
"""
return super(DataFrame, self).isin(values)

def isna(self):
"""
Detect missing values.
Expand Down
6 changes: 0 additions & 6 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,12 +1180,6 @@ def info(
show_counts=show_counts,
)

def isin(self, values): # noqa: PR01, RT01, D200
"""
Whether elements in `Series` are contained in `values`.
"""
return super(Series, self).isin(values, shape_hint="column")

def isna(self):
"""
Detect missing values.
Expand Down

0 comments on commit 21db3be

Please sign in to comment.