Skip to content

Commit

Permalink
Update global seabirds.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 550602484
  • Loading branch information
Bart van Merriënboer authored and copybara-github committed Jul 24, 2023
1 parent 166c672 commit 000183e
Show file tree
Hide file tree
Showing 73 changed files with 519,234 additions and 87,579 deletions.
4 changes: 2 additions & 2 deletions chirp/data/soundevents/soundevents.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,11 @@ class Soundevents(tfds.core.GeneratorBasedBuilder):
}

def _info(self) -> tfds.core.DatasetInfo:
db = namespace_db.NamespaceDatabase.load_csvs()
db = namespace_db.load_db()
dataset_class_list = db.class_lists[self.builder_config.class_list_name]
logging.info(
'Currently considering a total of %s soundevent.',
dataset_class_list.size,
len(dataset_class_list.classes),
)

full_length = self.builder_config.localization_fn is None
Expand Down
4 changes: 2 additions & 2 deletions chirp/data/soundscapes/dataset_fns.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ def load_powdermill_annotations(annotations_path: epath.Path) -> pd.DataFrame:
filter_fn = lambda row: False

# Convert dataset labels to ebird2021.
db = namespace_db.NamespaceDatabase.load_csvs()
db = namespace_db.load_db()
ebird_mapping = db.mappings['ibp2019_to_ebird2021']
ebird_mapping_dict = ebird_mapping.to_dict()
ebird_mapping_dict = ebird_mapping.mapped_pairs
class_fn = lambda row: [ # pylint: disable=g-long-lambda
ebird_mapping_dict.get(row['Species'].strip(), row['Species'].strip())
]
Expand Down
3 changes: 2 additions & 1 deletion chirp/data/soundscapes/soundscapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,8 @@ def _info(self) -> tfds.core.DatasetInfo:
self.builder_config.keep_unknown_annotation,
)
logging.info(
'Currently considering a total of %s species.', dataset_class_list.size
'Currently considering a total of %s species.',
len(dataset_class_list.classes),
)
full_length = self.builder_config.localization_fn is None
if full_length:
Expand Down
7 changes: 3 additions & 4 deletions chirp/data/soundscapes/soundscapes_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def load_class_list(
Returns:
The desired ClassList.
"""
db = namespace_db.NamespaceDatabase.load_csvs()
db = namespace_db.load_db()
dataset_class_list = db.class_lists[class_list_name]

if (
Expand All @@ -79,9 +79,8 @@ def load_class_list(
):
# Create a new class list which includes the 'unknown' class.
dataset_class_list = namespace.ClassList(
dataset_class_list.name + '_' + UNKNOWN_LABEL,
dataset_class_list.namespace,
[UNKNOWN_LABEL] + list(dataset_class_list.classes),
(UNKNOWN_LABEL,) + dataset_class_list.classes,
)
return dataset_class_list

Expand Down Expand Up @@ -419,7 +418,7 @@ def _start_end_key(seg):
continue
# found an overlap!
for label in seg['label']:
if label in class_list:
if label in class_list.classes:
interval_labels.add(label)
else:
logging.info('dropping label not in class list: %s', str(label))
Expand Down
4 changes: 2 additions & 2 deletions chirp/eval/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _load_learned_representations(self):
"""Loads classifier output weights from the separator."""
label_csv_path = epath.Path(self.model_path) / 'label.csv'
with label_csv_path.open('r') as f:
class_list = namespace.ClassList.from_csv('label', f)
class_list = namespace.ClassList.from_csv(f)
# Load the output layer weights.
variables_path = (
epath.Path(self.model_path) / 'savedmodel/variables/variables'
Expand All @@ -187,7 +187,7 @@ def _load_learned_representations(self):
if (
len(v_shape) == 3
and v_shape[0] == 1
and v_shape[-1] == class_list.size
and v_shape[-1] == len(class_list.classes)
):
candidates.append(v)
if not candidates:
Expand Down
3 changes: 3 additions & 0 deletions chirp/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ def export_converted_model(
if class_lists is not None:
for key, class_list in class_lists.items():
with tf.io.gfile.GFile(os.path.join(workdir, f'{key}.csv'), 'w') as f:
# NOTE: Although the namespace is written to the file, there is no
# guarantee that the class list will still be compatible with the
# namespace if the latter gets updated.
f.write(class_list.to_csv())

if not export_tf_lite:
Expand Down
18 changes: 9 additions & 9 deletions chirp/inference/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,9 @@ def __post_init__(self):
model_path = base_path / 'savedmodel'
label_csv_path = base_path / 'label.csv'

self.model = tf.saved_model.load(model_path.as_posix())
self.model = tf.saved_model.load(model_path)
with label_csv_path.open('r') as f:
self.class_list = namespace.ClassList.from_csv('label', f)
self.class_list = namespace.ClassList.from_csv(f)

# Check whether the model support polymorphic batch shape.
sig = self.model.signatures['serving_default']
Expand Down Expand Up @@ -266,7 +266,7 @@ def embed(self, audio_array: np.ndarray) -> interface.InferenceOutputs:
)

return interface.InferenceOutputs(
all_embeddings, {self.class_list.name: all_logits}, None
all_embeddings, {'label': all_logits}, None
)

def batch_embed(
Expand All @@ -291,9 +291,7 @@ def batch_embed(
embeddings, framed_audio.shape[:2] + (embeddings.shape[-1],)
)

return interface.InferenceOutputs(
embeddings, {self.class_list.name: logits}, None
)
return interface.InferenceOutputs(embeddings, {'label': logits}, None)


@dataclasses.dataclass
Expand Down Expand Up @@ -325,7 +323,7 @@ def __post_init__(self):
self.model = tf.saved_model.load(epath.Path(self.model_path) / 'savedmodel')
label_csv_path = epath.Path(self.model_path) / 'label.csv'
with label_csv_path.open('r') as f:
self.class_list = namespace.ClassList.from_csv('label', f)
self.class_list = namespace.ClassList.from_csv(f)

def embed(self, audio_array: np.ndarray) -> interface.InferenceOutputs:
# Drop samples to allow reshaping to frame_size
Expand Down Expand Up @@ -362,7 +360,7 @@ def embed(self, audio_array: np.ndarray) -> interface.InferenceOutputs:
)
all_embeddings = np.reshape(all_embeddings, [-1, all_embeddings.shape[-1]])
return interface.InferenceOutputs(
all_embeddings, {self.class_list.name: all_logits}, sep_audio
all_embeddings, {'label': all_logits}, sep_audio
)

def batch_embed(self, audio_batch: np.ndarray) -> interface.InferenceOutputs:
Expand Down Expand Up @@ -592,7 +590,9 @@ def embed(self, audio_array: np.ndarray) -> interface.InferenceOutputs:
)
if self.make_logits:
outputs['logits'] = {
'label': np.zeros([time_size, self.class_list.size], np.float32),
'label': np.zeros(
[time_size, len(self.class_list.classes)], np.float32
),
}
outputs['logits']['label'] = self.convert_logits(
outputs['logits']['label'], self.class_list, self.target_class_list
Expand Down
14 changes: 3 additions & 11 deletions chirp/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@
General utilities to help with handling paths.
"""
import os
from typing import Iterable
from absl import logging
from etils import epath
import pathlib


def get_absolute_epath(relative_path: str) -> epath.Path:
def get_absolute_path(relative_path: os.PathLike[str] | str) -> pathlib.Path:
"""Returns the absolute epath.Path associated with the relative_path.
Args:
Expand All @@ -32,11 +30,5 @@ def get_absolute_epath(relative_path: str) -> epath.Path:
Returns:
The absolute path to the resource.
"""
file_path = epath.Path(__file__).parent / relative_path
file_path = pathlib.Path(__file__).parent / relative_path
return file_path


def listdir(relative_path: str) -> Iterable[str]:
"""List the contents of a directory in the Chirp project."""
absolute_path = get_absolute_epath(relative_path).as_posix()
return os.listdir(absolute_path)
11 changes: 5 additions & 6 deletions chirp/preprocessing/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ class ConvertBirdTaxonomyLabels(FeaturesPreprocessOp):
output_masks: bool = True

# The following members are for cached / stateful data.
db: namespace_db.NamespaceDatabase | None = None
db: namespace_db.TaxonomyDatabase | None = None

def __post_init__(self):
# Create NamespaceDatabase in post_init to avoid loading CSVs repeatedly.
Expand All @@ -713,7 +713,7 @@ def __post_init__(self):
# applied multiple times on different datasets. Otherwise, in subsequent
# pipeline applications TF will attempt to re-use previous constants
# belonging to a different tf.function.
self.db = namespace_db.NamespaceDatabase.load_csvs()
self.db = namespace_db.load_db()

def load_tables(
self, source_class_list: namespace.ClassList
Expand Down Expand Up @@ -756,7 +756,7 @@ def load_tables(
target_taxa_classes = target_classes.apply_namespace_mapping(
namespace_mapping
)
namespace_table, _ = source_class_list.get_namespace_map_tf_lookup(
namespace_table = source_class_list.get_namespace_map_tf_lookup(
namespace_mapping
)
class_table, label_mask = source_taxa_classes.get_class_map_tf_lookup(
Expand Down Expand Up @@ -861,19 +861,18 @@ def __call__(
self, features: Features, dataset_info: tfds.core.DatasetInfo
) -> Features:
source_classes = namespace.ClassList(
'dataset',
self.source_namespace,
# TODO(vdumoulin): generalize this to labels beyond 'ignore'.
# Some dataset variants (e.g. bird_taxonomy/downstream_slice_peaked)
# use an 'ignore' label which is not part of the eBirds taxonomy. We
# ignore this label; the mapping tables return an 'unknown' default
# value, so all 'ignore' labels will naturally be converted to
# 'unknown'.
[
tuple(
n
for n in dataset_info.features[self.species_feature_name].names
if n != 'ignore'
],
),
)
output_features = self.convert_features(features, source_classes)
return output_features
Expand Down
2 changes: 1 addition & 1 deletion chirp/projects/sfda/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def prepare_audio_model(
class_lists = class_utils.get_class_lists(
target_class_list, add_taxonomic_labels=False
)
num_classes = {k: v.size for (k, v) in class_lists.items()}
num_classes = {k: len(v.classes) for (k, v) in class_lists.items()}
model = taxonomy_model.TaxonomyModel(
num_classes=num_classes,
encoder=model_config.encoder,
Expand Down
Loading

0 comments on commit 000183e

Please sign in to comment.