diff --git a/storey/targets.py b/storey/targets.py index 02dc04ed..b1f00ac4 100644 --- a/storey/targets.py +++ b/storey/targets.py @@ -140,8 +140,16 @@ def unzip_cols(columns): _type_string_to_pyarrow_type = { "str": pyarrow.string(), + "int8": pyarrow.int8(), + "int16": pyarrow.int16(), "int32": pyarrow.int32(), + "int64": pyarrow.int64(), + "uint8": pyarrow.uint8(), + "uint16": pyarrow.uint16(), + "uint32": pyarrow.uint32(), + "uint64": pyarrow.uint64(), "int": pyarrow.int64(), + "float16": pyarrow.float16(), "float32": pyarrow.float32(), "float": pyarrow.float64(), "bool": pyarrow.bool_(), diff --git a/tests/test_flow.py b/tests/test_flow.py index 06d28875..c402dae3 100644 --- a/tests/test_flow.py +++ b/tests/test_flow.py @@ -2270,7 +2270,7 @@ def test_write_to_parquet_string_as_datetime(tmpdir): out_dir = f"{tmpdir}/test_write_to_parquet_string_to_datetime/{uuid.uuid4().hex}/" columns = ["my_int", "my_string", "my_datetime"] columns_with_type = [ - ("my_int", "int"), + ("my_int", "int8"), # ML-4162 ("my_string", "str"), ("my_datetime", "datetime"), ] @@ -2289,6 +2289,7 @@ def test_write_to_parquet_string_as_datetime(tmpdir): controller.emit([i, f"this is {i}", my_time.isoformat()]) expected.append([i, f"this is {i}", my_time.isoformat(sep=" ")]) expected_df = pd.DataFrame(expected, columns=columns) + expected_df["my_int"] = expected_df["my_int"].astype("int8") expected_df["my_datetime"] = expected_df["my_datetime"].astype("datetime64[us]") controller.terminate() controller.await_termination()