Skip to content

Commit

Permalink
Cast features in stats (huggingface#3086)
Browse files Browse the repository at this point in the history
* cast features in stats

* style

* keep split_extension_features
  • Loading branch information
lhoestq authored Oct 18, 2024
1 parent 7190a71 commit 8f3eb16
Showing 1 changed file with 6 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,13 @@ def _column_from_feature(
column_stats = column.compute_and_prepare_response(local_parquet_split_directory)
else:
try:
if split_extension_features:
data = pl.DataFrame._from_arrow(
pq.read_table(local_parquet_split_directory, columns=[column.name])
data = pl.DataFrame._from_arrow(
pq.read_table(
local_parquet_split_directory,
columns=[column.name],
schema=Features.from_dict({column.name: features[column.name]}).arrow_schema,
)
else:
data = pl.read_parquet(local_parquet_split_directory / "*.parquet", columns=[column.name])
)
except Exception as error:
raise PolarsParquetReadError(
f"Error reading parquet file(s) at {local_parquet_split_directory=}, columns=[{column.name}]: {error}",
Expand Down

0 comments on commit 8f3eb16

Please sign in to comment.