diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index f1424fb..056be28 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -17,7 +17,7 @@ import datetime import re -from typing import Union +from typing import Optional, Union import numpy import packaging.version @@ -103,7 +103,7 @@ def _datetime( r"(?::(?P\d+)" r"(?:\.(?P\d*))?)?)?\s*$" ).match, - ): + ) -> Optional[numpy.datetime64]: # Convert pyarrow values to datetime.time. if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)): scalar = ( @@ -115,8 +115,16 @@ def _datetime( if scalar is None: return None - elif isinstance(scalar, datetime.time): - return datetime.datetime.combine(_EPOCH, scalar) + if isinstance(scalar, datetime.time): + return pandas.Timestamp( + year=1970, + month=1, + day=1, + hour=scalar.hour, + minute=scalar.minute, + second=scalar.second, + microsecond=scalar.microsecond, + ).to_datetime64() elif isinstance(scalar, pandas.Timestamp): return scalar.to_datetime64() elif isinstance(scalar, str): @@ -125,20 +133,20 @@ def _datetime( if not parsed: raise ValueError(f"Bad time string: {repr(scalar)}") - hours = parsed.group("hours") - minutes = parsed.group("minutes") - seconds = parsed.group("seconds") + hour = parsed.group("hours") + minute = parsed.group("minutes") + second = parsed.group("seconds") fraction = parsed.group("fraction") - microseconds = int(fraction.ljust(6, "0")[:6]) if fraction else 0 - return datetime.datetime( - 1970, - 1, - 1, - int(hours), - int(minutes) if minutes else 0, - int(seconds) if seconds else 0, - microseconds, - ) + nanosecond = int(fraction.ljust(9, "0")[:9]) if fraction else 0 + return pandas.Timestamp( + year=1970, + month=1, + day=1, + hour=int(hour), + minute=int(minute) if minute else 0, + second=int(second) if second else 0, + nanosecond=nanosecond, + ).to_datetime64() else: raise TypeError("Invalid value type", scalar) @@ -225,7 +233,7 @@ class DateArray(core.BaseDatetimeArray): def _datetime( scalar, match_fn=re.compile(r"\s*(?P\d+)-(?P\d+)-(?P\d+)\s*$").match, - ): + ) -> Optional[numpy.datetime64]: # Convert pyarrow values to datetime.date. if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)): scalar = scalar.as_py() @@ -233,7 +241,9 @@ def _datetime( if scalar is None: return None elif isinstance(scalar, datetime.date): - return datetime.datetime(scalar.year, scalar.month, scalar.day) + return pandas.Timestamp( + year=scalar.year, month=scalar.month, day=scalar.day + ).to_datetime64() elif isinstance(scalar, str): match = match_fn(scalar) if not match: @@ -241,7 +251,7 @@ def _datetime( year = int(match.group("year")) month = int(match.group("month")) day = int(match.group("day")) - return datetime.datetime(year, month, day) + return pandas.Timestamp(year=year, month=month, day=day).to_datetime64() else: raise TypeError("Invalid value type", scalar) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index c8f3ad4..3ade198 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -127,9 +127,7 @@ def take( if allow_fill: fill_value = self._validate_scalar(fill_value) fill_value = ( - numpy.datetime64() - if fill_value is None - else numpy.datetime64(self._datetime(fill_value)) + numpy.datetime64() if fill_value is None else self._datetime(fill_value) ) if (indices < -1).any(): raise ValueError( diff --git a/tests/unit/test_arrow.py b/tests/unit/test_arrow.py index 5f45a90..4d4fc50 100644 --- a/tests/unit/test_arrow.py +++ b/tests/unit/test_arrow.py @@ -183,13 +183,13 @@ def types_mapper( type=pyarrow.time64("us"), ), ), - ( + # Only microseconds are supported when reading data. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/19 + # Still, round-trip with pyarrow nanosecond precision scalars + # is supported. + pytest.param( pandas.Series( [ - # Only microseconds are supported when reading data. See: - # https://github.com/googleapis/python-db-dtypes-pandas/issues/19 - # Still, round-trip with pyarrow nanosecond precision scalars - # is supported. pyarrow.scalar(0, pyarrow.time64("ns")), pyarrow.scalar( 12 * HOUR_NANOS @@ -216,6 +216,21 @@ def types_mapper( ], type=pyarrow.time64("ns"), ), + id="time-nanoseconds-arrow-round-trip", + ), + pytest.param( + pandas.Series( + ["0:0:0", "12:30:15.123456789", "23:59:59.999999999"], dtype="dbtime", + ), + pyarrow.array( + [ + 0, + 12 * HOUR_NANOS + 30 * MINUTE_NANOS + 15 * SECOND_NANOS + 123_456_789, + 23 * HOUR_NANOS + 59 * MINUTE_NANOS + 59 * SECOND_NANOS + 999_999_999, + ], + type=pyarrow.time64("ns"), + ), + id="time-nanoseconds-arrow-from-string", ), ] diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index c919f6d..b906f24 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -55,6 +55,11 @@ def test_date_parsing(value, expected): ("2021-2-99", "day is out of range for month"), ("2021-99-1", "month must be in 1[.][.]12"), ("10000-1-1", "year 10000 is out of range"), + # Outside of min/max values pandas.Timestamp. + ("0001-01-01", "Out of bounds"), + ("9999-12-31", "Out of bounds"), + ("1677-09-21", "Out of bounds"), + ("2262-04-12", "Out of bounds"), ], ) def test_date_parsing_errors(value, error):