From e86059a1633e09a7fd0d787cf40a6db668149c00 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Thu, 31 Oct 2024 11:02:01 +0100 Subject: [PATCH] Update syntax to Python 3.9 (#452) * Update syntax to Python 3.9 * Increase code quality for _put_media() * Revert "Increase code quality for _put_media()" This reverts commit bbbe082fd9198a429e1aa168e69f36a2eac1cac4. --- audb/core/api.py | 12 ++-- audb/core/config.py | 2 +- audb/core/dependencies.py | 51 +++++++------- audb/core/flavor.py | 12 ++-- audb/core/info.py | 50 +++++++------- audb/core/load.py | 107 +++++++++++++++--------------- audb/core/load_to.py | 34 +++++----- audb/core/lock.py | 12 ++-- audb/core/publish.py | 46 +++++++------ audb/core/repository.py | 6 +- audb/core/stream.py | 21 +++--- audb/core/utils.py | 8 +-- benchmarks/dependencies_polars.py | 51 +++++++------- tests/test_info.py | 20 +++--- tests/test_publish.py | 2 +- tests/test_publish_table.py | 4 +- tests/test_stream.py | 7 +- 17 files changed, 229 insertions(+), 216 deletions(-) diff --git a/audb/core/api.py b/audb/core/api.py index e5f8f4cc..ee11cc0a 100644 --- a/audb/core/api.py +++ b/audb/core/api.py @@ -1,6 +1,8 @@ +from __future__ import annotations + +from collections.abc import Sequence import os import tempfile -import typing import pandas as pd @@ -286,7 +288,7 @@ def exists( *, version: str = None, bit_depth: int = None, - channels: typing.Union[int, typing.Sequence[int]] = None, + channels: int | Sequence[int] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, @@ -367,7 +369,7 @@ def flavor_path( version: str, *, bit_depth: int = None, - channels: typing.Union[int, typing.Sequence[int]] = None, + channels: int | Sequence[int] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, @@ -453,7 +455,7 @@ def latest_version( def remove_media( name: str, - files: typing.Union[str, typing.Sequence[str]], + files: str | Sequence[str], *, verbose: bool = False, ): @@ -560,7 +562,7 @@ def repository( def versions( name: str, -) -> typing.List[str]: +) -> list[str]: r"""Available versions of database. Args: diff --git a/audb/core/config.py b/audb/core/config.py index 0a4a5b08..c83d9202 100644 --- a/audb/core/config.py +++ b/audb/core/config.py @@ -27,7 +27,7 @@ def load_configuration_file(config_file: str): """ if os.path.exists(config_file): - with open(config_file, "r") as cf: + with open(config_file) as cf: config = yaml.load(cf, Loader=yaml.BaseLoader) else: config = {} diff --git a/audb/core/dependencies.py b/audb/core/dependencies.py index c6f83f76..57aedd48 100644 --- a/audb/core/dependencies.py +++ b/audb/core/dependencies.py @@ -1,8 +1,11 @@ +from __future__ import annotations + +from collections.abc import Callable +from collections.abc import Sequence import errno import os import re import tempfile -import typing import pandas as pd import pyarrow as pa @@ -110,7 +113,7 @@ def __eq__(self, other: "Dependencies") -> bool: """ return self._df.equals(other._df) - def __getitem__(self, file: str) -> typing.List: + def __getitem__(self, file: str) -> list: r"""File information. Args: @@ -130,7 +133,7 @@ def __str__(self) -> str: # noqa: D105 return str(self._df) @property - def archives(self) -> typing.List[str]: + def archives(self) -> list[str]: r"""All media, table, attachment archives. Return: @@ -140,7 +143,7 @@ def archives(self) -> typing.List[str]: return sorted(self._df.archive.unique().tolist()) @property - def attachments(self) -> typing.List[str]: + def attachments(self) -> list[str]: r"""Attachment paths (can be a file or a folder). Returns: @@ -152,7 +155,7 @@ def attachments(self) -> typing.List[str]: ].index.tolist() @property - def attachment_ids(self) -> typing.List[str]: + def attachment_ids(self) -> list[str]: r"""Attachment IDs. Returns: @@ -164,7 +167,7 @@ def attachment_ids(self) -> typing.List[str]: ].archive.tolist() @property - def files(self) -> typing.List[str]: + def files(self) -> list[str]: r"""All media, table, attachments. Returns: @@ -174,7 +177,7 @@ def files(self) -> typing.List[str]: return self._df.index.tolist() @property - def media(self) -> typing.List[str]: + def media(self) -> list[str]: r"""Media files. Returns: @@ -186,7 +189,7 @@ def media(self) -> typing.List[str]: ].index.tolist() @property - def removed_media(self) -> typing.List[str]: + def removed_media(self) -> list[str]: r"""Removed media files. Returns: @@ -199,7 +202,7 @@ def removed_media(self) -> typing.List[str]: ].index.tolist() @property - def table_ids(self) -> typing.List[str]: + def table_ids(self) -> list[str]: r"""Table IDs. Like :meth:`audb.Dependencies.tables`, @@ -213,7 +216,7 @@ def table_ids(self) -> typing.List[str]: return [os.path.splitext(table[3:])[0] for table in self.tables] @property - def tables(self) -> typing.List[str]: + def tables(self) -> list[str]: r"""Tables files. Returns: @@ -455,8 +458,8 @@ def _add_attachment( def _add_media( self, - values: typing.Sequence[ - typing.Tuple[ + values: Sequence[ + tuple[ str, # file str, # archive int, # bit_depth @@ -522,8 +525,8 @@ def _column_loc( self, column: str, file: str, - dtype: typing.Callable = None, - ) -> typing.Any: + dtype: Callable = None, + ) -> object: r"""Column content for selected file. Args: @@ -569,7 +572,7 @@ def _dataframe_to_table( table = table.rename_columns(columns) return table - def _drop(self, files: typing.Sequence[str]): + def _drop(self, files: Sequence[str]): r"""Drop files from table. Args: @@ -638,8 +641,8 @@ def _table_to_dataframe(self, table: pa.Table) -> pd.DataFrame: def _update_media( self, - values: typing.Sequence[ - typing.Tuple[ + values: Sequence[ + tuple[ str, # file str, # archive int, # bit_depth @@ -670,7 +673,7 @@ def _update_media( def _update_media_version( self, - files: typing.Sequence[str], + files: Sequence[str], version: str, ): r"""Update version of media files. @@ -685,7 +688,7 @@ def _update_media_version( def error_message_missing_object( object_type: str, - missing_object_id: typing.Union[str, typing.Sequence], + missing_object_id: str | Sequence, database_name: str = None, database_version: str = None, ) -> str: @@ -720,12 +723,12 @@ def error_message_missing_object( def filter_deps( - requested_deps: typing.Optional[typing.Union[str, typing.Sequence[str]]], - available_deps: typing.Sequence[str], + requested_deps: str | Sequence[str] | None, + available_deps: Sequence[str], deps_type: str, database_name: str = None, database_version: str = None, -) -> typing.Sequence[str]: +) -> Sequence[str]: r"""Filter dependency files by requested files. Args: @@ -778,7 +781,7 @@ def filter_deps( def download_dependencies( - backend_interface: typing.Type[audbackend.interface.Base], + backend_interface: type[audbackend.interface.Base], name: str, version: str, verbose: bool, @@ -833,7 +836,7 @@ def download_dependencies( def upload_dependencies( - backend_interface: typing.Type[audbackend.interface.Base], + backend_interface: type[audbackend.interface.Base], deps: Dependencies, db_root: str, name: str, diff --git a/audb/core/flavor.py b/audb/core/flavor.py index 5e0188a7..51ddad00 100644 --- a/audb/core/flavor.py +++ b/audb/core/flavor.py @@ -1,6 +1,8 @@ +from __future__ import annotations + +from collections.abc import Sequence import os import shutil -import typing import numpy as np @@ -53,7 +55,7 @@ def __init__( self, *, bit_depth: int = None, - channels: typing.Union[int, typing.Sequence[int]] = None, + channels: int | Sequence[int] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, @@ -150,9 +152,9 @@ def short_id( def _check_convert( self, file: str, - bit_depth: typing.Optional[int], - channels: typing.Optional[int], - sampling_rate: typing.Optional[int], + bit_depth: int | None, + channels: int | None, + sampling_rate: int | None, ) -> bool: r"""Check if file needs to be converted to flavor.""" format = audeer.file_extension(file).lower() diff --git a/audb/core/info.py b/audb/core/info.py index e1495a42..a01341f2 100644 --- a/audb/core/info.py +++ b/audb/core/info.py @@ -1,4 +1,4 @@ -import typing +from collections.abc import Sequence import pandas as pd @@ -16,7 +16,7 @@ def attachments( *, version: str = None, cache_root: str = None, -) -> typing.Dict[str, audformat.Attachment]: +) -> dict[str, audformat.Attachment]: """Attachment(s) of database. Args: @@ -77,10 +77,10 @@ def bit_depths( name: str, *, version: str = None, - tables: typing.Sequence = None, - media: typing.Sequence = None, + tables: Sequence = None, + media: Sequence = None, cache_root: str = None, -) -> typing.Set[int]: +) -> set[int]: """Media bit depth. Args: @@ -113,10 +113,10 @@ def channels( name: str, *, version: str = None, - tables: typing.Sequence = None, - media: typing.Sequence = None, + tables: Sequence = None, + media: Sequence = None, cache_root: str = None, -) -> typing.Set[int]: +) -> set[int]: """Media channels. Args: @@ -181,8 +181,8 @@ def duration( name: str, *, version: str = None, - tables: typing.Sequence = None, - media: typing.Sequence = None, + tables: Sequence = None, + media: Sequence = None, cache_root: str = None, ) -> pd.Timedelta: """Total media duration. @@ -223,7 +223,7 @@ def files( *, version: str = None, cache_root: str = None, -) -> typing.List[str]: +) -> list[str]: """Media files included in the database. Args: @@ -248,10 +248,10 @@ def formats( name: str, *, version: str = None, - tables: typing.Sequence = None, - media: typing.Sequence = None, + tables: Sequence = None, + media: Sequence = None, cache_root: str = None, -) -> typing.Set[str]: +) -> set[str]: """Media formats. Args: @@ -327,7 +327,7 @@ def languages( *, version: str = None, cache_root: str = None, -) -> typing.List[str]: +) -> list[str]: """Languages of database. Args: @@ -420,7 +420,7 @@ def media( *, version: str = None, cache_root: str = None, -) -> typing.Dict[str, audformat.Media]: +) -> dict[str, audformat.Media]: """Audio and video media of database. Args: @@ -452,7 +452,7 @@ def meta( *, version: str = None, cache_root: str = None, -) -> typing.Dict: +) -> dict: """Meta information of database. Args: @@ -484,7 +484,7 @@ def misc_tables( *, version: str = None, cache_root: str = None, -) -> typing.Dict[str, audformat.MiscTable]: +) -> dict[str, audformat.MiscTable]: """Miscellaneous tables of database. Args: @@ -546,7 +546,7 @@ def raters( *, version: str = None, cache_root: str = None, -) -> typing.Dict[str, audformat.Rater]: +) -> dict[str, audformat.Rater]: """Raters contributed to database. Args: @@ -577,10 +577,10 @@ def sampling_rates( name: str, *, version: str = None, - tables: typing.Sequence = None, - media: typing.Sequence = None, + tables: Sequence = None, + media: Sequence = None, cache_root: str = None, -) -> typing.Set[int]: +) -> set[int]: """Media sampling rates. Args: @@ -615,7 +615,7 @@ def schemes( version: str = None, load_tables: bool = True, cache_root: str = None, -) -> typing.Dict[str, audformat.Scheme]: +) -> dict[str, audformat.Scheme]: """Schemes of database. Args: @@ -680,7 +680,7 @@ def splits( *, version: str = None, cache_root: str = None, -) -> typing.Dict[str, audformat.Split]: +) -> dict[str, audformat.Split]: """Splits of database. Args: @@ -714,7 +714,7 @@ def tables( *, version: str = None, cache_root: str = None, -) -> typing.Dict[str, audformat.Table]: +) -> dict[str, audformat.Table]: """Tables of database. Args: diff --git a/audb/core/load.py b/audb/core/load.py index 4059a488..5ca44835 100644 --- a/audb/core/load.py +++ b/audb/core/load.py @@ -1,6 +1,8 @@ +from __future__ import annotations + +from collections.abc import Sequence import os import shutil -import typing import filelock import pandas as pd @@ -25,14 +27,14 @@ from audb.core.utils import lookup_backend -CachedVersions = typing.Sequence[typing.Tuple[audeer.StrictVersion, str, Dependencies]] +CachedVersions = Sequence[tuple[audeer.StrictVersion, str, Dependencies]] def _cached_versions( name: str, version: str, flavor: Flavor, - cache_root: typing.Optional[str], + cache_root: str | None, ) -> CachedVersions: r"""Find other cached versions of same flavor.""" df = cached(cache_root=cache_root, name=name) @@ -72,12 +74,12 @@ def _cached_versions( def _cached_files( - files: typing.Sequence[str], + files: Sequence[str], deps: Dependencies, cached_versions: CachedVersions, - flavor: typing.Optional[Flavor], + flavor: Flavor | None, verbose: bool, -) -> (typing.Sequence[typing.Union[str, str]], typing.Sequence[str]): +) -> tuple[list[str], list[str]]: r"""Find cached files. Args: @@ -205,8 +207,8 @@ def _database_is_complete( def _files_duration( db: audformat.Database, deps: Dependencies, - files: typing.Sequence[str], - format: typing.Optional[str], + files: Sequence[str], + format: str | None, ): durs = deps().loc[files, "duration"] durs = durs[durs > 0] @@ -225,7 +227,7 @@ def _files_duration( def _get_attachments_from_cache( - attachments: typing.Sequence[str], + attachments: Sequence[str], db_root: str, db: audformat.Database, deps: Dependencies, @@ -233,7 +235,7 @@ def _get_attachments_from_cache( flavor: Flavor, num_workers: int, verbose: bool, -) -> typing.Sequence[str]: +) -> list[str]: r"""Copy files from cache. This function copies all files @@ -292,7 +294,7 @@ def job(cache_root: str, file: str): def _get_files_from_cache( - files: typing.Sequence[str], + files: Sequence[str], files_type: str, db_root: str, deps: Dependencies, @@ -300,7 +302,7 @@ def _get_files_from_cache( flavor: Flavor, num_workers: int, verbose: bool, -) -> typing.Sequence[str]: +) -> Sequence[str]: r"""Copy files from cache. This function copies requested media files @@ -376,11 +378,11 @@ def job(cache_root: str, file: str): def _get_attachments_from_backend( db: audformat.Database, - attachments: typing.Sequence[str], + attachments: Sequence[str], db_root: str, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): r"""Load attachments from backend.""" @@ -424,12 +426,12 @@ def job(path: str): def _get_media_from_backend( name: str, - media: typing.Sequence[str], + media: Sequence[str], db_root: str, - flavor: typing.Optional[Flavor], + flavor: Flavor | None, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): r"""Load media from backend.""" @@ -512,12 +514,12 @@ def job(archive: str, version: str): def _get_tables_from_backend( db: audformat.Database, - tables: typing.Sequence[str], + tables: Sequence[str], db_root: str, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], + backend_interface: type[audbackend.interface.Base], pickle_tables: bool, - num_workers: typing.Optional[int], + num_workers: int | None, verbose: bool, ): r"""Load tables from backend. @@ -600,18 +602,18 @@ def job(table: str): def _load_attachments( - attachments: typing.Sequence[str], - backend_interface: typing.Type[audbackend.interface.Base], + attachments: Sequence[str], + backend_interface: type[audbackend.interface.Base], db_root: str, db: audformat.Database, version: str, - cached_versions: typing.Optional[CachedVersions], + cached_versions: CachedVersions | None, deps: Dependencies, flavor: Flavor, cache_root: str, num_workers: int, verbose: bool, -) -> typing.Optional[CachedVersions]: +) -> CachedVersions | None: r"""Load attachments to cache. Args: @@ -677,20 +679,20 @@ def _load_attachments( def _load_files( - files: typing.Sequence[str], + files: Sequence[str], files_type: str, - backend_interface: typing.Type[audbackend.interface.Base], + backend_interface: type[audbackend.interface.Base], db_root: str, db: audformat.Database, version: str, - cached_versions: typing.Optional[CachedVersions], + cached_versions: CachedVersions | None, deps: Dependencies, flavor: Flavor, cache_root: str, pickle_tables: bool, num_workers: int, verbose: bool, -) -> typing.Optional[CachedVersions]: +) -> CachedVersions | None: r"""Load files to cache. Loads media files, @@ -790,7 +792,7 @@ def _load_files( def _misc_tables_used_in_scheme( db: audformat.Database, -) -> typing.List[str]: +) -> list[str]: r"""List of misc tables that are used inside a scheme. Args: @@ -810,7 +812,7 @@ def _misc_tables_used_in_scheme( def _misc_tables_used_in_table( table: audformat.Table, -) -> typing.List[str]: +) -> list[str]: r"""List of misc tables that are used inside schemes of a table. Args: @@ -830,12 +832,12 @@ def _misc_tables_used_in_table( def _missing_files( - files: typing.Sequence[str], + files: Sequence[str], files_type: str, db_root: str, flavor: Flavor, verbose: bool, -) -> typing.Sequence[str]: +) -> list[str]: r"""List missing files. Checks for media files, @@ -896,7 +898,7 @@ def _update_path( db: audformat.Database, root: str, full_path: bool, - format: typing.Optional[str], + format: str | None, num_workers: int, verbose: bool, ): @@ -946,8 +948,8 @@ def job(table): def filtered_dependencies( name: str, version: str, - media: typing.Union[str, typing.Sequence[str]], - tables: typing.Union[str, typing.Sequence[str]], + media: str | Sequence[str], + tables: str | Sequence[str], cache_root: str = None, ) -> pd.DataFrame: r"""Filter media by tables. @@ -1003,21 +1005,21 @@ def load( version: str = None, only_metadata: bool = False, bit_depth: int = None, - channels: typing.Union[int, typing.Sequence[int]] = None, + channels: int | Sequence[int] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, - attachments: typing.Union[str, typing.Sequence[str]] = None, - tables: typing.Union[str, typing.Sequence[str]] = None, - media: typing.Union[str, typing.Sequence[str]] = None, + attachments: str | Sequence[str] = None, + tables: str | Sequence[str] = None, + media: str | Sequence[str] = None, removed_media: bool = False, full_path: bool = True, pickle_tables: bool = True, cache_root: str = None, - num_workers: typing.Optional[int] = 1, + num_workers: int | None = 1, timeout: float = -1, verbose: bool = True, -) -> typing.Optional[audformat.Database]: +) -> audformat.Database | None: r"""Load database. Loads meta and media files of a database to the local cache and returns @@ -1307,7 +1309,7 @@ def load_attachment( version: str = None, cache_root: str = None, verbose: bool = True, -) -> typing.List[str]: +) -> list[str]: r"""Load attachment(s) of database. Args: @@ -1429,10 +1431,7 @@ def load_header_to( flavor: Flavor = None, add_audb_meta: bool = False, overwrite: bool = False, -) -> typing.Tuple[ - audformat.Database, - typing.Optional[typing.Type[audbackend.interface.Base]], -]: +) -> tuple[audformat.Database, type[audbackend.interface.Base] | None]: r"""Load database header from folder or backend. If the database header cannot be found in ``db_root`` @@ -1484,19 +1483,19 @@ def load_header_to( def load_media( name: str, - media: typing.Union[str, typing.Sequence[str]], + media: str | Sequence[str], *, version: str = None, bit_depth: int = None, - channels: typing.Union[int, typing.Sequence[int]] = None, + channels: int | Sequence[int] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, cache_root: str = None, - num_workers: typing.Optional[int] = 1, + num_workers: int | None = 1, timeout: float = -1, verbose: bool = True, -) -> typing.Optional[typing.List]: +) -> list | None: r"""Load media file(s). If you are interested in media files @@ -1645,10 +1644,10 @@ def load_table( table: str, *, version: str = None, - map: typing.Dict[str, typing.Union[str, typing.Sequence[str]]] = None, + map: dict[str, str | Sequence[str]] = None, pickle_tables: bool = True, cache_root: str = None, - num_workers: typing.Optional[int] = 1, + num_workers: int | None = 1, verbose: bool = True, ) -> pd.DataFrame: r"""Load a database table. diff --git a/audb/core/load_to.py b/audb/core/load_to.py index e766a97a..d2dc35c8 100644 --- a/audb/core/load_to.py +++ b/audb/core/load_to.py @@ -1,5 +1,7 @@ +from __future__ import annotations + +from collections.abc import Sequence import os -import typing import audbackend import audeer @@ -17,7 +19,7 @@ def _find_attachments( db_root: str, deps: Dependencies, -) -> typing.List[str]: +) -> list[str]: r"""Find missing attachments.""" attachments = [] @@ -33,9 +35,9 @@ def _find_media( db: audformat.Database, db_root: str, deps: Dependencies, - num_workers: typing.Optional[int], + num_workers: int | None, verbose: bool, -) -> typing.List[str]: +) -> list[str]: r"""Find missing media. Collects all media files present in ``db.files``, @@ -70,9 +72,9 @@ def _find_tables( db_header: audformat.Database, db_root: str, deps: Dependencies, - num_workers: typing.Optional[int], + num_workers: int | None, verbose: bool, -) -> typing.List[str]: +) -> list[str]: r"""Find missing tables. Collects all tables and misc tables @@ -116,13 +118,13 @@ def job(table: str): def _get_attachments( - paths: typing.Sequence[str], + paths: Sequence[str], db_root: str, db_root_tmp: str, db_name: str, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): r"""Load attachments from backend.""" @@ -159,13 +161,13 @@ def job(path: str): def _get_media( - media: typing.List[str], + media: list[str], db_root: str, db_root_tmp: str, db_name: str, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): # create folder tree to avoid race condition @@ -203,13 +205,13 @@ def job(archive: str, version: str): def _get_tables( - tables: typing.List[str], + tables: list[str], db_root: str, db_root_tmp: str, db_name: str, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): r"""Load table files from backend. @@ -296,7 +298,7 @@ def load_to( only_metadata: bool = False, pickle_tables: bool = True, cache_root: str = None, - num_workers: typing.Optional[int] = 1, + num_workers: int | None = 1, verbose: bool = True, ) -> audformat.Database: r"""Load database to directory. diff --git a/audb/core/lock.py b/audb/core/lock.py index bc93922b..ff5a79de 100644 --- a/audb/core/lock.py +++ b/audb/core/lock.py @@ -1,5 +1,7 @@ +from __future__ import annotations + +from collections.abc import Sequence import types -import typing import filelock @@ -11,7 +13,7 @@ class FolderLock: def __init__( self, - folders: typing.Union[str, typing.Sequence[str]], + folders: str | Sequence[str], *, timeout: float = -1, ): @@ -48,9 +50,9 @@ def __enter__(self) -> "FolderLock": def __exit__( self, - exc_type: typing.Optional[typing.Type[BaseException]], - exc_value: typing.Optional[BaseException], - traceback: typing.Optional[types.TracebackType], + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, ) -> None: """Release the lock(s).""" for lock in self.locks: diff --git a/audb/core/publish.py b/audb/core/publish.py index f3191392..9ba10385 100644 --- a/audb/core/publish.py +++ b/audb/core/publish.py @@ -1,9 +1,11 @@ +from __future__ import annotations + import collections +from collections.abc import Mapping import os import re import shutil import tempfile -import typing import audbackend import audeer @@ -42,7 +44,7 @@ def job(table_id): def _check_for_missing_media( db: audformat.Database, db_root: str, - db_root_files: typing.Set[str], + db_root_files: set[str], deps: Dependencies, ): r"""Check for media that is not in root and not in dependencies.""" @@ -77,7 +79,7 @@ def _find_attachments( version: str, deps: Dependencies, verbose: bool, -) -> typing.List[str]: +) -> list[str]: r"""Find altered, new or removed attachments and update 'deps'.""" # drop removed attachments from dependency table removed_attachments = [ @@ -162,13 +164,13 @@ def _find_attachments( def _find_media( db: audformat.Database, db_root: str, - db_root_files: typing.Set[str], + db_root_files: set[str], version: str, deps: Dependencies, - archives: typing.Mapping[str, str], + archives: Mapping[str, str], num_workers: int, verbose: bool, -) -> typing.Set[str]: +) -> set[str]: r"""Find archives with new, altered or removed media and update 'deps'.""" media_archives = set() db_media = set(db.files) @@ -258,7 +260,7 @@ def _find_tables( version: str, deps: Dependencies, verbose: bool, -) -> typing.List[str]: +) -> list[str]: r"""Find altered, new or removed tables and update 'deps'.""" table_ids = list(db) # PARQUET is default table, @@ -291,7 +293,7 @@ def _find_tables( def _get_root_files( db_root: str, -) -> typing.Set[str]: +) -> set[str]: r"""Return list of files in root directory.""" db_root_files = audeer.list_file_names( db_root, @@ -311,7 +313,7 @@ def _media_values( version: str, archive: str, checksum: str, -) -> typing.Tuple[str, str, int, int, str, float, str, int, float, int, str]: +) -> tuple[str, str, int, int, str, float, str, int, float, int, str]: r"""Return values of a media entry in dependencies. The dependency table expects the following columns: @@ -384,12 +386,12 @@ def _media_values( def _put_attachments( - attachments: typing.List[str], + attachments: list[str], db_root: str, db: audformat.Database, version: str, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): def job(attachment_id: str): @@ -410,14 +412,14 @@ def job(attachment_id: str): def _put_media( - media_archives: typing.Set[str], + media_archives: set[str], db_root: str, db_name: str, version: str, - previous_version: typing.Optional[str], + previous_version: str | None, deps: Dependencies, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): r"""Upload archives with new, altered or removed media files.""" @@ -485,12 +487,12 @@ def job(archive): def _put_tables( - tables: typing.List[str], + tables: list[str], db_root: str, db_name: str, version: str, - backend_interface: typing.Type[audbackend.interface.Base], - num_workers: typing.Optional[int], + backend_interface: type[audbackend.interface.Base], + num_workers: int | None, verbose: bool, ): def job(table: str): @@ -520,10 +522,10 @@ def publish( version: str, repository: Repository, *, - archives: typing.Mapping[str, str] = None, - previous_version: typing.Optional[str] = "latest", + archives: Mapping[str, str] = None, + previous_version: str | None = "latest", cache_root: str = None, - num_workers: typing.Optional[int] = 1, + num_workers: int | None = 1, verbose: bool = True, ) -> Dependencies: r"""Publish database. diff --git a/audb/core/repository.py b/audb/core/repository.py index 47563fa0..cfb01597 100644 --- a/audb/core/repository.py +++ b/audb/core/repository.py @@ -1,5 +1,3 @@ -import typing - import audbackend @@ -72,7 +70,7 @@ def __repr__(self): # noqa: D105 f")" ) - def create_backend_interface(self) -> typing.Type[audbackend.interface.Base]: + def create_backend_interface(self) -> type[audbackend.interface.Base]: r"""Create backend interface to access repository. When :attr:`Repository.backend` equals ``artifactory``, @@ -127,7 +125,7 @@ def create_backend_interface(self) -> typing.Type[audbackend.interface.Base]: def register( cls, backend_name: str, - backend_class: typing.Type[audbackend.backend.Base], + backend_class: type[audbackend.backend.Base], ): r"""Register backend class. diff --git a/audb/core/stream.py b/audb/core/stream.py index 4c78b5ae..759fa95c 100644 --- a/audb/core/stream.py +++ b/audb/core/stream.py @@ -1,8 +1,9 @@ from __future__ import annotations import abc +from collections.abc import Iterable +from collections.abc import Sequence import os -import typing import pandas as pd import pyarrow as pa @@ -112,19 +113,19 @@ def __init__( table: str, *, version: str, - map: typing.Dict[str, typing.Union[str, typing.Sequence[str]]], + map: dict[str, str | Sequence[str]], batch_size: int, shuffle: bool, buffer_size: int, only_metadata: bool, bit_depth: int, - channels: typing.Union[int, typing.Sequence[int]], + channels: int | Sequence[int], format: str, mixdown: bool, sampling_rate: int, full_path: bool, cache_root: str, - num_workers: typing.Optional[int], + num_workers: int | None, timeout: float, verbose: bool, ): @@ -189,7 +190,7 @@ def __next__(self) -> pd.DataFrame: return df @abc.abstractmethod - def _initialize_stream(self) -> typing.Iterable: + def _initialize_stream(self) -> Iterable: r"""Create table iterator object. This method needs to be implemented @@ -306,7 +307,7 @@ def _load_media(self, df: pd.DataFrame): verbose=self._verbose, ) - def _postprocess_batch(self, batch: typing.Any) -> pd.DataFrame: + def _postprocess_batch(self, batch: object) -> pd.DataFrame: r"""Post-process batch data to desired dataframe. Args: @@ -414,19 +415,19 @@ def stream( table: str, *, version: str = None, - map: typing.Dict[str, typing.Union[str, typing.Sequence[str]]] = None, + map: dict[str, str | Sequence[str]] = None, batch_size: int = 16, shuffle: bool = False, buffer_size: int = 100_000, only_metadata: bool = False, bit_depth: int = None, - channels: typing.Union[int, typing.Sequence[int]] = None, + channels: int | Sequence[int] = None, format: str = None, mixdown: bool = False, sampling_rate: int = None, full_path: bool = True, cache_root: str = None, - num_workers: typing.Optional[int] = 1, + num_workers: int | None = 1, timeout: float = -1, verbose: bool = True, ) -> DatabaseIterator: @@ -535,7 +536,7 @@ def stream( # Extract kwargs # to pass on to the DatabaseIterator constructor - kwargs = dict((k, v) for (k, v) in locals().items() if k not in ["name", "table"]) + kwargs = {k: v for (k, v) in locals().items() if k not in ["name", "table"]} flavor = Flavor( bit_depth=bit_depth, diff --git a/audb/core/utils.py b/audb/core/utils.py index 1fe71d41..90fc56f2 100644 --- a/audb/core/utils.py +++ b/audb/core/utils.py @@ -1,5 +1,5 @@ +from collections.abc import Sequence import os -import typing import warnings import pyarrow.parquet as parquet @@ -15,7 +15,7 @@ def lookup_backend( name: str, version: str, -) -> typing.Type[audbackend.interface.Base]: +) -> type[audbackend.interface.Base]: r"""Return backend of requested database. If the database is stored in several repositories, @@ -67,7 +67,7 @@ def md5(file: str) -> str: def mkdir_tree( - files: typing.Sequence[str], + files: Sequence[str], root: str, ): r"""Helper function to create folder tree.""" @@ -81,7 +81,7 @@ def mkdir_tree( def _lookup( name: str, version: str, -) -> typing.Tuple[Repository, typing.Type[audbackend.interface.Base]]: +) -> tuple[Repository, type[audbackend.interface.Base]]: r"""Helper function to look up database in all repositories. Returns repository, version and backend object. diff --git a/benchmarks/dependencies_polars.py b/benchmarks/dependencies_polars.py index 8363f694..960e5dc1 100644 --- a/benchmarks/dependencies_polars.py +++ b/benchmarks/dependencies_polars.py @@ -1,8 +1,11 @@ +from __future__ import annotations + +from collections.abc import Callable +from collections.abc import Sequence import errno import os import re import tempfile -import typing import pandas as pd import polars as pl @@ -135,7 +138,7 @@ def __eq__(self, other: "Dependencies") -> bool: """ return self._df.equals(other._df) - def __getitem__(self, file: str) -> typing.List: + def __getitem__(self, file: str) -> list: r"""File information. Args: @@ -166,7 +169,7 @@ def __str__(self) -> str: # noqa: D105 return str_repr @property - def archives(self) -> typing.List[str]: + def archives(self) -> list[str]: r"""All media, table, attachment archives. Return: @@ -213,7 +216,7 @@ def archives(self) -> typing.List[str]: # ) @property - def attachments(self) -> typing.List[str]: + def attachments(self) -> list[str]: r"""Attachment paths (can be a file or a folder). Returns: @@ -224,7 +227,7 @@ def attachments(self) -> typing.List[str]: return self._df.filter(pl.col("type") == deptype)[self.index_col].to_list() @property - def attachment_ids(self) -> typing.List[str]: + def attachment_ids(self) -> list[str]: r"""Attachment IDs. Returns: @@ -236,7 +239,7 @@ def attachment_ids(self) -> typing.List[str]: ].to_list() @property - def files(self) -> typing.List[str]: + def files(self) -> list[str]: r"""All media, table, attachments. Returns: @@ -246,7 +249,7 @@ def files(self) -> typing.List[str]: return list(self._idx.keys()) @property - def media(self) -> typing.List[str]: + def media(self) -> list[str]: r"""Media files. Returns: @@ -258,7 +261,7 @@ def media(self) -> typing.List[str]: ].to_list() @property - def removed_media(self) -> typing.List[str]: + def removed_media(self) -> list[str]: r"""Removed media files. Returns: @@ -270,7 +273,7 @@ def removed_media(self) -> typing.List[str]: )[self.index_col].to_list() @property - def table_ids(self) -> typing.List[str]: + def table_ids(self) -> list[str]: r"""Table IDs. Like :meth:`audb.Dependencies.tables`, @@ -284,7 +287,7 @@ def table_ids(self) -> typing.List[str]: return [table[3:-4] for table in self.tables] @property - def tables(self) -> typing.List[str]: + def tables(self) -> list[str]: r"""Tables files. Returns: @@ -562,8 +565,8 @@ def _add_attachment( def _add_media( self, - values: typing.Sequence[ - typing.Tuple[ + values: Sequence[ + tuple[ str, # file str, # archive int, # bit_depth @@ -637,8 +640,8 @@ def _column_loc( self, column: str, file: str, - dtype: typing.Callable = None, - ) -> typing.Any: + dtype: Callable = None, + ) -> object: r"""Column content for selected files and column.""" value = self._df.row(self._idx[file], named=True)[column] if dtype is not None: @@ -674,7 +677,7 @@ def _dataframe_to_table( table = table.rename_columns(columns) return table - def _drop(self, files: typing.Sequence[str]): + def _drop(self, files: Sequence[str]): r"""Drop files from table. Args: @@ -736,8 +739,8 @@ def _table_to_dataframe(self, table: pa.Table) -> pd.DataFrame: def _update_media( self, - values: typing.Sequence[ - typing.Tuple[ + values: Sequence[ + tuple[ str, # file str, # archive int, # bit_depth @@ -781,7 +784,7 @@ def _update_media( def _update_media_version( self, - files: typing.Sequence[str], + files: Sequence[str], version: str, ): r"""Update version of media files. @@ -808,7 +811,7 @@ def _update_media_version( def error_message_missing_object( object_type: str, - missing_object_id: typing.Union[str, typing.Sequence], + missing_object_id: str | Sequence, database_name: str = None, database_version: str = None, ) -> str: @@ -843,12 +846,12 @@ def error_message_missing_object( def filter_deps( - requested_deps: typing.Optional[typing.Union[str, typing.Sequence[str]]], - available_deps: typing.Sequence[str], + requested_deps: str | Sequence[str] | None, + available_deps: Sequence[str], deps_type: str, database_name: str = None, database_version: str = None, -) -> typing.Sequence[str]: +) -> Sequence[str]: r"""Filter dependency files by requested files. Args: @@ -901,7 +904,7 @@ def filter_deps( def download_dependencies( - backend_interface: typing.Type[audbackend.interface.Base], + backend_interface: type[audbackend.interface.Base], name: str, version: str, verbose: bool, @@ -956,7 +959,7 @@ def download_dependencies( def upload_dependencies( - backend_interface: typing.Type[audbackend.interface.Base], + backend_interface: type[audbackend.interface.Base], deps: Dependencies, db_root: str, name: str, diff --git a/tests/test_info.py b/tests/test_info.py index af7059d9..be16b558 100644 --- a/tests/test_info.py +++ b/tests/test_info.py @@ -122,16 +122,16 @@ def test_header(db): def test_bit_depths(): deps = audb.dependencies(DB_NAME, version=DB_VERSION) - assert audb.info.bit_depths(DB_NAME) == set( - [deps.bit_depth(file) for file in deps.media if deps.bit_depth(file)] - ) + assert audb.info.bit_depths(DB_NAME) == { + deps.bit_depth(file) for file in deps.media if deps.bit_depth(file) + } def test_channels(): deps = audb.dependencies(DB_NAME, version=DB_VERSION) - assert audb.info.channels(DB_NAME) == set( - [deps.channels(file) for file in deps.media if deps.channels(file)] - ) + assert audb.info.channels(DB_NAME) == { + deps.channels(file) for file in deps.media if deps.channels(file) + } def test_description(db): @@ -178,7 +178,7 @@ def test_duration(tables, media): def test_formats(): deps = audb.dependencies(DB_NAME, version=DB_VERSION) - assert audb.info.formats(DB_NAME) == set([deps.format(file) for file in deps.media]) + assert audb.info.formats(DB_NAME) == {deps.format(file) for file in deps.media} def test_languages(db): @@ -215,9 +215,9 @@ def test_raters(db): def test_sampling_rates(): deps = audb.dependencies(DB_NAME, version=DB_VERSION) - assert audb.info.sampling_rates(DB_NAME) == set( - [deps.sampling_rate(file) for file in deps.media if deps.sampling_rate(file)] - ) + assert audb.info.sampling_rates(DB_NAME) == { + deps.sampling_rate(file) for file in deps.media if deps.sampling_rate(file) + } def test_schemes(db): diff --git a/tests/test_publish.py b/tests/test_publish.py index 21bc4a00..0e886a53 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -411,7 +411,7 @@ def test_publish(tmpdir, dbs, persistent_repository, version): number_of_media_files_in_custom_archives = len(set(archives.keys())) number_of_custom_archives = len(set(archives.values())) number_of_media_files = len(deps.media) - number_of_media_archives = len(set([deps.archive(file) for file in deps.media])) + number_of_media_archives = len({deps.archive(file) for file in deps.media}) assert (number_of_media_files_in_custom_archives - number_of_custom_archives) == ( number_of_media_files - number_of_media_archives ) diff --git a/tests/test_publish_table.py b/tests/test_publish_table.py index f17761a8..6ad7e3f0 100644 --- a/tests/test_publish_table.py +++ b/tests/test_publish_table.py @@ -1,5 +1,5 @@ +from collections.abc import Sequence import os -import typing import numpy as np import pyarrow.parquet as parquet @@ -125,7 +125,7 @@ def repo_path(*args): def assert_db_saved_to_dir( db: audformat.Database, root: str, - storage_formats: typing.Sequence[str], + storage_formats: Sequence[str], ): r"""Assert all database files are stored to the build dir. diff --git a/tests/test_stream.py b/tests/test_stream.py index 2810dcb0..0acde92a 100644 --- a/tests/test_stream.py +++ b/tests/test_stream.py @@ -1,5 +1,4 @@ import os -import typing import numpy as np import pandas as pd @@ -237,8 +236,8 @@ def test_buffer_size(self, table: str, batch_size: int, buffer_size: int): def test_db_cleanup( self, table: str, - expected_tables: typing.List, - expected_schemes: typing.List, + expected_tables: list, + expected_schemes: list, ): r"""Test removal of non-selected tables and schemes. @@ -298,7 +297,7 @@ def test_full_path(self, full_path: bool): ("files", {"speaker": "year-of-birth"}), ], ) - def test_map(self, table: str, map: typing.Dict): + def test_map(self, table: str, map: dict): r"""Test mapping of scheme labels. Args: