Skip to content

Commit

Permalink
FIX-#4828: allow dict_apply_builder use keyword argument `internal_…
Browse files Browse the repository at this point in the history
…indices` (#5945)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Apr 24, 2023
1 parent a335cdf commit f2422e9
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 12 deletions.
17 changes: 13 additions & 4 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2700,15 +2700,24 @@ def _dict_func(self, func, axis, *args, **kwargs):
if "axis" not in kwargs:
kwargs["axis"] = axis

def dict_apply_builder(df, func_dict={}): # pragma: no cover
func = {k: wrap_udf_function(v) if callable(v) else v for k, v in func.items()}

def dict_apply_builder(df, internal_indices=[]): # pragma: no cover
# Sometimes `apply` can return a `Series`, but we require that internally
# all objects are `DataFrame`s.
return pandas.DataFrame(df.apply(func_dict, *args, **kwargs))
# It looks like it doesn't need to use `internal_indices` option internally
# for the case since `apply` use labels from dictionary keys in `func` variable.
return pandas.DataFrame(df.apply(func, *args, **kwargs))

func = {k: wrap_udf_function(v) if callable(v) else v for k, v in func.items()}
labels = list(func.keys())
return self.__constructor__(
self._modin_frame.apply_full_axis_select_indices(
axis, dict_apply_builder, func, keep_remaining=False
axis,
dict_apply_builder,
labels,
new_index=labels if axis == 1 else None,
new_columns=labels if axis == 0 else None,
keep_remaining=False,
)
)

Expand Down
54 changes: 46 additions & 8 deletions modin/pandas/test/dataframe/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
arg_keys,
default_to_pandas_ignore_string,
)
from modin.config import NPartitions, StorageFormat
from modin.config import NPartitions
from modin.test.test_utils import warns_that_defaulting_to_pandas
from modin.utils import get_current_execution

Expand Down Expand Up @@ -116,13 +116,6 @@ def test_aggregate_error_checking():
modin_df.aggregate("NOT_EXISTS")


@pytest.mark.xfail(
StorageFormat.get() == "Pandas",
reason="DataFrame.apply(dict) raises an exception because of a bug in its"
+ "implementation for pandas storage format, this prevents us from catching the desired"
+ "exception. You can track this bug at:"
+ "https://github.com/modin-project/modin/issues/3221",
)
@pytest.mark.parametrize(
"func",
agg_func_values + agg_func_except_values,
Expand Down Expand Up @@ -245,6 +238,51 @@ def test_apply_udf(data, func):
)


def test_apply_dict_4828():
data = [[2, 4], [1, 3]]
modin_df1, pandas_df1 = create_test_dfs(data)
eval_general(
modin_df1,
pandas_df1,
lambda df: df.apply({0: (lambda x: x**2)}),
)
eval_general(
modin_df1,
pandas_df1,
lambda df: df.apply({0: (lambda x: x**2)}, axis=1),
)

# several partitions along axis 0
modin_df2, pandas_df2 = create_test_dfs(data, index=[2, 3])
modin_df3 = pd.concat([modin_df1, modin_df2], axis=0)
pandas_df3 = pandas.concat([pandas_df1, pandas_df2], axis=0)
eval_general(
modin_df3,
pandas_df3,
lambda df: df.apply({0: (lambda x: x**2)}),
)
eval_general(
modin_df3,
pandas_df3,
lambda df: df.apply({0: (lambda x: x**2)}, axis=1),
)

# several partitions along axis 1
modin_df4, pandas_df4 = create_test_dfs(data, columns=[2, 3])
modin_df5 = pd.concat([modin_df1, modin_df4], axis=1)
pandas_df5 = pandas.concat([pandas_df1, pandas_df4], axis=1)
eval_general(
modin_df5,
pandas_df5,
lambda df: df.apply({0: (lambda x: x**2)}),
)
eval_general(
modin_df5,
pandas_df5,
lambda df: df.apply({0: (lambda x: x**2)}, axis=1),
)


def test_apply_modin_func_4635():
data = [1]
modin_df, pandas_df = create_test_dfs(data)
Expand Down

0 comments on commit f2422e9

Please sign in to comment.