Skip to content

Commit

Permalink
Add details to expectations for scalars (#308)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Nov 17, 2023
1 parent 0c171a4 commit 1f81476
Show file tree
Hide file tree
Showing 9 changed files with 368 additions and 138 deletions.
114 changes: 66 additions & 48 deletions spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
if TYPE_CHECKING:
from typing_extensions import Self

from dataframe_api.dataframe_object import DataFrame

from .typing import DType, Namespace, NullType, Scalar
from .typing import (
AnyScalar,
DataFrame,
DType,
Namespace,
NullType,
Scalar,
)


__all__ = ["Column"]
Expand Down Expand Up @@ -224,7 +229,7 @@ def sorted_indices(
"""
...

def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
def __eq__(self, other: Self | AnyScalar) -> Self: # type: ignore[override]
"""Compare for equality.
Nulls should follow Kleene Logic.
Expand All @@ -247,7 +252,7 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
"""
...

def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
def __ne__(self, other: Self | AnyScalar) -> Self: # type: ignore[override]
"""Compare for non-equality.
Nulls should follow Kleene Logic.
Expand All @@ -270,7 +275,7 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
"""
...

def __ge__(self, other: Self | Scalar) -> Self:
def __ge__(self, other: Self | AnyScalar) -> Self:
"""Compare for "greater than or equal to" `other`.
Parameters
Expand All @@ -291,7 +296,7 @@ def __ge__(self, other: Self | Scalar) -> Self:
"""
...

def __gt__(self, other: Self | Scalar) -> Self:
def __gt__(self, other: Self | AnyScalar) -> Self:
"""Compare for "greater than" `other`.
Parameters
Expand All @@ -312,7 +317,7 @@ def __gt__(self, other: Self | Scalar) -> Self:
"""
...

def __le__(self, other: Self | Scalar) -> Self:
def __le__(self, other: Self | AnyScalar) -> Self:
"""Compare for "less than or equal to" `other`.
Parameters
Expand All @@ -333,7 +338,7 @@ def __le__(self, other: Self | Scalar) -> Self:
"""
...

def __lt__(self, other: Self | Scalar) -> Self:
def __lt__(self, other: Self | AnyScalar) -> Self:
"""Compare for "less than" `other`.
Parameters
Expand All @@ -354,7 +359,7 @@ def __lt__(self, other: Self | Scalar) -> Self:
"""
...

def __and__(self, other: Self | bool) -> Self:
def __and__(self, other: Self | bool | Scalar) -> Self:
"""Apply logical 'and' to `other` Column (or scalar) and this Column.
Nulls should follow Kleene Logic.
Expand All @@ -380,7 +385,7 @@ def __and__(self, other: Self | bool) -> Self:
"""
...

def __or__(self, other: Self | bool) -> Self:
def __or__(self, other: Self | bool | Scalar) -> Self:
"""Apply logical 'or' to `other` Column (or scalar) and this column.
Nulls should follow Kleene Logic.
Expand All @@ -406,7 +411,7 @@ def __or__(self, other: Self | bool) -> Self:
"""
...

def __add__(self, other: Self | Scalar) -> Self:
def __add__(self, other: Self | AnyScalar) -> Self:
"""Add `other` column or scalar to this column.
Parameters
Expand All @@ -427,7 +432,7 @@ def __add__(self, other: Self | Scalar) -> Self:
"""
...

def __sub__(self, other: Self | Scalar) -> Self:
def __sub__(self, other: Self | AnyScalar) -> Self:
"""Subtract `other` column or scalar from this column.
Parameters
Expand All @@ -448,7 +453,7 @@ def __sub__(self, other: Self | Scalar) -> Self:
"""
...

def __mul__(self, other: Self | Scalar) -> Self:
def __mul__(self, other: Self | AnyScalar) -> Self:
"""Multiply `other` column or scalar with this column.
Parameters
Expand All @@ -469,7 +474,7 @@ def __mul__(self, other: Self | Scalar) -> Self:
"""
...

def __truediv__(self, other: Self | Scalar) -> Self:
def __truediv__(self, other: Self | AnyScalar) -> Self:
"""Divide this column by `other` column or scalar. True division, returns floats.
Parameters
Expand All @@ -490,7 +495,7 @@ def __truediv__(self, other: Self | Scalar) -> Self:
"""
...

def __floordiv__(self, other: Self | Scalar) -> Self:
def __floordiv__(self, other: Self | AnyScalar) -> Self:
"""Floor-divide `other` column or scalar to this column.
Parameters
Expand All @@ -511,7 +516,7 @@ def __floordiv__(self, other: Self | Scalar) -> Self:
"""
...

def __pow__(self, other: Self | Scalar) -> Self:
def __pow__(self, other: Self | AnyScalar) -> Self:
"""Raise this column to the power of `other`.
Integer dtype to the power of non-negative integer dtype is integer dtype.
Expand All @@ -536,7 +541,7 @@ def __pow__(self, other: Self | Scalar) -> Self:
"""
...

def __mod__(self, other: Self | Scalar) -> Self:
def __mod__(self, other: Self | AnyScalar) -> Self:
"""Return modulus of this column by `other` (`%` operator).
Parameters
Expand All @@ -557,7 +562,7 @@ def __mod__(self, other: Self | Scalar) -> Self:
"""
...

def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
def __divmod__(self, other: Self | AnyScalar) -> tuple[Column, Column]:
"""Return quotient and remainder of integer division. See `divmod` builtin.
Parameters
Expand All @@ -578,16 +583,16 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
"""
...

def __radd__(self, other: Self | Scalar) -> Self:
def __radd__(self, other: Self | AnyScalar) -> Self:
...

def __rsub__(self, other: Self | Scalar) -> Self:
def __rsub__(self, other: Self | AnyScalar) -> Self:
...

def __rmul__(self, other: Self | Scalar) -> Self:
def __rmul__(self, other: Self | AnyScalar) -> Self:
...

def __rtruediv__(self, other: Self | Scalar) -> Self:
def __rtruediv__(self, other: Self | AnyScalar) -> Self:
...

def __rand__(self, other: Self | bool) -> Self:
Expand All @@ -596,13 +601,13 @@ def __rand__(self, other: Self | bool) -> Self:
def __ror__(self, other: Self | bool) -> Self:
...

def __rfloordiv__(self, other: Self | Scalar) -> Self:
def __rfloordiv__(self, other: Self | AnyScalar) -> Self:
...

def __rpow__(self, other: Self | Scalar) -> Self:
def __rpow__(self, other: Self | AnyScalar) -> Self:
...

def __rmod__(self, other: Self | Scalar) -> Self:
def __rmod__(self, other: Self | AnyScalar) -> Self:
...

def __invert__(self) -> Self:
Expand All @@ -615,7 +620,7 @@ def __invert__(self) -> Self:
"""
...

def any(self, *, skip_nulls: bool = True) -> bool | NullType:
def any(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a bool.
Raises
Expand All @@ -625,7 +630,7 @@ def any(self, *, skip_nulls: bool = True) -> bool | NullType:
"""
...

def all(self, *, skip_nulls: bool = True) -> bool | NullType:
def all(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a bool.
Raises
Expand All @@ -635,23 +640,23 @@ def all(self, *, skip_nulls: bool = True) -> bool | NullType:
"""
...

def min(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def min(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.
Any data type that supports comparisons
must be supported. The returned value has the same dtype as the column.
"""
...

def max(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def max(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.
Any data type that supports comparisons
must be supported. The returned value has the same dtype as the column.
"""
...

def sum(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def sum(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.
Must be supported for numerical and
Expand All @@ -660,15 +665,15 @@ def sum(self, *, skip_nulls: bool = True) -> Scalar | NullType:
"""
...

def prod(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def prod(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.
Must be supported for numerical data types.
The returned value has the same dtype as the column.
"""
...

def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def median(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.
Must be supported for numerical and
Expand All @@ -678,7 +683,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
"""
...

def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def mean(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.
Must be supported for numerical and
Expand All @@ -691,9 +696,9 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def std(
self,
*,
correction: int | float = 1,
skip_nulls: bool = True,
) -> Scalar | NullType:
correction: float = 1,
skip_nulls: bool | Scalar = True,
) -> Scalar:
"""Reduction returns a scalar.
Must be supported for numerical and
Expand Down Expand Up @@ -724,9 +729,9 @@ def std(
def var(
self,
*,
correction: int | float = 1,
skip_nulls: bool = True,
) -> Scalar | NullType:
correction: float | Scalar = 1,
skip_nulls: bool | Scalar = True,
) -> Scalar:
"""Reduction returns a scalar.
Must be supported for numerical and
Expand Down Expand Up @@ -835,7 +840,7 @@ def is_in(self, values: Self) -> Self:
"""
...

def unique_indices(self, *, skip_nulls: bool = True) -> Self:
def unique_indices(self, *, skip_nulls: bool | Scalar = True) -> Self:
"""Return indices corresponding to unique values in Column.
Returns
Expand All @@ -855,7 +860,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Self:
"""
...

def fill_nan(self, value: float | NullType, /) -> Self:
def fill_nan(self, value: float | NullType | Scalar, /) -> Self:
"""Fill floating point ``nan`` values with the given fill value.
Parameters
Expand All @@ -868,7 +873,7 @@ def fill_nan(self, value: float | NullType, /) -> Self:
"""
...

def fill_null(self, value: Scalar, /) -> Self:
def fill_null(self, value: AnyScalar, /) -> Self:
"""Fill null values with the given fill value.
Parameters
Expand Down Expand Up @@ -914,7 +919,7 @@ def to_array(self) -> Any:
"""
...

def rename(self, name: str) -> Self:
def rename(self, name: str | Scalar) -> Self:
"""Rename column.
Parameters
Expand All @@ -929,17 +934,17 @@ def rename(self, name: str) -> Self:
"""
...

def shift(self, offset: int) -> Self:
def shift(self, offset: int | Scalar) -> Self:
"""Shift values by `offset` positions, filling missing values with `null`.
For example, if the original column contains values `[1, 4, 2]`, then:
- `.shift(1)` will return `[null, 1, 4]`,
- `.shift(-1)` will return `[4, 2, null]`,
Parameters
----------
offset
offset : int
How many positions to shift by.
"""
...
Expand Down Expand Up @@ -1020,7 +1025,7 @@ def iso_weekday(self) -> Self:
"""
...

def unix_timestamp(self, *, time_unit: Literal["s", "ms", "us"] = "s") -> Self:
def unix_timestamp(self, *, time_unit: str | Scalar = "s") -> Self:
"""Return number of seconds / milliseconds / microseconds since the Unix epoch.
The Unix epoch is 00:00:00 UTC on 1 January 1970.
Expand All @@ -1039,3 +1044,16 @@ def unix_timestamp(self, *, time_unit: Literal["s", "ms", "us"] = "s") -> Self:
discarded.
"""
...

def persist(self) -> Self:
"""Hint that computation prior to this point should not be repeated.
This is intended as a hint, rather than as a directive. Implementations
which do not separate lazy vs eager execution may ignore this method and
treat it as a no-op.
.. note::
This method may trigger execution. If necessary, it should be called
at most once per dataframe, and as late as possible in the pipeline.
"""
...
Loading

0 comments on commit 1f81476

Please sign in to comment.