Skip to content

Commit

Permalink
Gracefully handle web and cloud data sources when scanning for audio …
Browse files Browse the repository at this point in the history
…files.

PiperOrigin-RevId: 645884486
  • Loading branch information
sdenton4 authored and copybara-github committed Jun 25, 2024
1 parent 6c77181 commit 5a374b9
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
8 changes: 7 additions & 1 deletion chirp/inference/embed_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@ def create_source_infos(
"""
source_files = []
for pattern in source_file_patterns:
for source_file in epath.Path('').glob(pattern):
if '://' in pattern:
root, pattern = pattern.split('://')
root = root + '://'
else:
root = ''

for source_file in epath.Path(root).glob(pattern):
source_files.append(source_file)

source_file_splits = []
Expand Down
6 changes: 5 additions & 1 deletion chirp/inference/tests/embed_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,11 @@ def test_frame_audio(self):

def test_create_source_infos(self):
# Just one file, but it's all good.
globs = [path_utils.get_absolute_path('inference/tests/testdata/clap.wav')]
globs = [
path_utils.get_absolute_path(
'inference/tests/testdata/clap.wav'
).as_posix()
]
# Disable sharding by setting shard_len_s <= 0.
got_infos = embed_lib.create_source_infos(
globs, shard_len_s=-1, num_shards_per_file=100
Expand Down

0 comments on commit 5a374b9

Please sign in to comment.