Skip to content

Commit

Permalink
fix filtered formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-hh committed Oct 8, 2024
1 parent 3b65d99 commit 7119cd2
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/datasets/iterable_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1276,9 +1276,13 @@ def _iter(self):
function_args.append([current_idx + i for i in range(batch_len)])
mask = self.function(*function_args, **self.fn_kwargs)
# yield one example at a time from the batch
example_keys = combined_key.split("_")
examples = _batch_to_examples(batch)
for key, example, to_keep in zip(example_keys, examples, mask):
# TODO: nicer way to handle keys?
if not self.formatting:
keys = combined_key.split("_")
else:
keys = [combined_key] * len(mask)
for key, example, to_keep in zip(keys, examples, mask):
current_idx += 1
if self._state_dict:
self._state_dict["num_examples_since_previous_state"] += 1
Expand Down

0 comments on commit 7119cd2

Please sign in to comment.