Skip to content

Commit

Permalink
FEAT-modin-project#6808: Implement '__arrow_array__' for Series (modi…
Browse files Browse the repository at this point in the history
…n-project#7200)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Apr 18, 2024
1 parent 177c912 commit e9dbcc1
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
8 changes: 8 additions & 0 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,14 @@ def values(self): # noqa: RT01, D200
data = pd.Categorical(data, dtype=self.dtype)
return data

def __arrow_array__(self, type=None): # noqa: GL08
# Although pandas.Series does not implement this method (true for version 2.2.*),
# however, pyarrow has support for it. This method emulates this behavior and
# allows pyarrow to work with modin.pandas.Series.
import pyarrow

return pyarrow.array(self._to_pandas(), type=type)

def add(
self, other, level=None, fill_value=None, axis=0
) -> Series: # noqa: PR01, RT01, D200
Expand Down
24 changes: 24 additions & 0 deletions modin/tests/pandas/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,30 @@ def test_constructor_arrow_extension_array():
df_equals(md_ser.dtypes, pd_ser.dtypes)


def test_pyarrow_array_retrieve():
pa = pytest.importorskip("pyarrow")
modin_series, pandas_series = create_test_series(
[1, 2, None], dtype="uint8[pyarrow]"
)
eval_general(
modin_series,
pandas_series,
lambda ser: pa.array(ser),
)


def test___arrow_array__():
# https://github.com/modin-project/modin/issues/6808
pa = pytest.importorskip("pyarrow")
mpd_df_1 = pd.DataFrame({"a": ["1", "2", "3"], "b": ["4", "5", "6"]})
mpd_df_2 = pd.DataFrame({"a": ["7", "8", "9"], "b": ["10", "11", "12"]})
test_df = pd.concat([mpd_df_1, mpd_df_2])

res_from_md = pa.Table.from_pandas(df=test_df)
res_from_pd = pa.Table.from_pandas(df=test_df._to_pandas())
assert res_from_md.equals(res_from_pd)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_copy(data):
modin_series, pandas_series = create_test_series(data)
Expand Down

0 comments on commit e9dbcc1

Please sign in to comment.