From 60ae524509f04399ab0e1fb0ec8dbf1ee209e73e Mon Sep 17 00:00:00 2001 From: "Thomas J. Zajac" Date: Wed, 17 Jul 2024 09:08:44 +0000 Subject: [PATCH] Current (finished?) state with hexkit issues --- .../src/fins/adapters/inbound/event_sub.py | 6 +- .../fins/src/fins/core/information_service.py | 23 ++- services/fins/src/fins/core/models.py | 2 +- .../fins/ports/inbound/information_service.py | 14 +- services/fins/src/fins/py.typed | 1 + services/fins/tests_fins/fixtures/config.py | 41 +++++ services/fins/tests_fins/fixtures/joint.py | 148 ++++++++++++++++++ .../fins/tests_fins/fixtures/test_config.yaml | 11 ++ services/fins/tests_fins/fixtures/utils.py | 20 +++ .../fins/tests_fins/test_typical_journey.py | 118 ++++++++++++++ 10 files changed, 366 insertions(+), 18 deletions(-) create mode 100644 services/fins/src/fins/py.typed create mode 100644 services/fins/tests_fins/fixtures/config.py create mode 100644 services/fins/tests_fins/fixtures/joint.py create mode 100644 services/fins/tests_fins/fixtures/test_config.yaml create mode 100644 services/fins/tests_fins/fixtures/utils.py create mode 100644 services/fins/tests_fins/test_typical_journey.py diff --git a/services/fins/src/fins/adapters/inbound/event_sub.py b/services/fins/src/fins/adapters/inbound/event_sub.py index ba85c55d..d6f457fa 100644 --- a/services/fins/src/fins/adapters/inbound/event_sub.py +++ b/services/fins/src/fins/adapters/inbound/event_sub.py @@ -92,7 +92,7 @@ async def _consume_file_internally_registered(self, *, payload: JsonObject): ) await self._information_service.register_information( - file_information=validated_payload + file_registered=validated_payload ) @@ -126,9 +126,7 @@ def __init__( async def changed( self, resource_id: str, update: event_schemas.FileDeletionRequested ) -> None: - """Consume change event for File Deletion Requests. - Idempotence is handled by the core, so no intermediary is required. - """ + """Consume change event for File Deletion Requests.""" await self.information_service.deletion_requested(file_id=update.file_id) async def deleted(self, resource_id: str) -> None: diff --git a/services/fins/src/fins/core/information_service.py b/services/fins/src/fins/core/information_service.py index 6be9f41c..50803235 100644 --- a/services/fins/src/fins/core/information_service.py +++ b/services/fins/src/fins/core/information_service.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""TODO""" +"""Contains logic for public file information storage, retrieval and deletion.""" import logging @@ -40,7 +40,8 @@ async def deletion_requested(self, file_id: str): await self._file_information_dao.get_by_id(id_=file_id) except ResourceNotFoundError: log.info( - f"Information for file with id '{file_id}' has already been deleted." + f"Information for file with id '{ + file_id}' has already been deleted." ) return @@ -56,19 +57,27 @@ async def register_information( size=file_registered.decrypted_size, sha256_hash=file_registered.decrypted_sha256, ) + file_id = file_information.file_id try: await self._file_information_dao.insert(file_information) + log.debug(f"Sucessfully inserted information for file {file_id} ") except ResourceAlreadyExistsError: - information_exists = self.InformationAlreadyRegistered( - file_id=file_information.file_id - ) - log.warning(information_exists) + existing_information = self._file_information_dao.get_by_id(id_=file_id) + log.debug(f"Found existing information for file {file_id}") + # Only log if information to be inserted is a mismatch + if existing_information != file_information: + information_exists = self.MismatchingInformationAlreadyRegistered( + file_id=file_id + ) + log.error(information_exists) async def serve_information(self, file_id: str) -> FileInformation: - """Retrieve stored public information for the five file ID.""" + """Retrieve stored public information for the five file ID to be served by API.""" try: file_information = await self._file_information_dao.get_by_id(file_id) + log.debug(f"Information for file { + file_information.file_id} has been served.") except ResourceNotFoundError as error: information_not_found = self.InformationNotFoundError(file_id=file_id) log.warning(information_not_found) diff --git a/services/fins/src/fins/core/models.py b/services/fins/src/fins/core/models.py index ee6dbb09..8489b561 100644 --- a/services/fins/src/fins/core/models.py +++ b/services/fins/src/fins/core/models.py @@ -36,4 +36,4 @@ class FileInformation(BaseModel): class FileDeletionRequested(event_schemas.FileDeletionRequested): - """TODO""" + """Internal event_schema alias for DAO/DTO modelling purposes.""" diff --git a/services/fins/src/fins/ports/inbound/information_service.py b/services/fins/src/fins/ports/inbound/information_service.py index e45233b9..b3edfb0e 100644 --- a/services/fins/src/fins/ports/inbound/information_service.py +++ b/services/fins/src/fins/ports/inbound/information_service.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""TODO""" +"""Contains interfaces for public file information storage, retrieval and deletion.""" from abc import ABC, abstractmethod @@ -26,18 +26,20 @@ class InformationServicePort(ABC): metadata for files registered with the Internal File Registry service. """ - class InformationAlreadyRegistered(RuntimeError): + class MismatchingInformationAlreadyRegistered(RuntimeError): """Raised when information for a given file ID is not registered.""" def __init__(self, *, file_id: str): - message = f"Information for the file with ID {file_id} has already been registered." + message = f"Mismatching information for the file with ID { + file_id} has already been registered." super().__init__(message) class InformationNotFoundError(RuntimeError): """Raised when information for a given file ID is not registered.""" def __init__(self, *, file_id: str): - message = f"Information for the file with ID {file_id} is not registered." + message = f"Information for the file with ID { + file_id} is not registered." super().__init__(message) @abstractmethod @@ -46,10 +48,10 @@ async def deletion_requested(self, file_id: str): @abstractmethod async def register_information( - self, file_information: event_schemas.FileInternallyRegistered + self, file_registered: event_schemas.FileInternallyRegistered ): """Store information for a file newly registered with the Internal File Registry.""" @abstractmethod async def serve_information(self, file_id: str) -> FileInformation: - """Retrieve stored public information for the five file ID.""" + """Retrieve stored public information for the five file ID to be served by API.""" diff --git a/services/fins/src/fins/py.typed b/services/fins/src/fins/py.typed new file mode 100644 index 00000000..d3245e74 --- /dev/null +++ b/services/fins/src/fins/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. This package uses inline types. diff --git a/services/fins/tests_fins/fixtures/config.py b/services/fins/tests_fins/fixtures/config.py new file mode 100644 index 00000000..2ce822eb --- /dev/null +++ b/services/fins/tests_fins/fixtures/config.py @@ -0,0 +1,41 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test config""" + +from pathlib import Path + +from pydantic_settings import BaseSettings + +from fins.config import Config +from tests_fins.fixtures.utils import BASE_DIR + +TEST_CONFIG_YAML = BASE_DIR / "test_config.yaml" + + +def get_config( + sources: list[BaseSettings] | None = None, + default_config_yaml: Path = TEST_CONFIG_YAML, +) -> Config: + """Merges parameters from the default TEST_CONFIG_YAML with params inferred + from testcontainers. + """ + sources_dict: dict[str, object] = {} + + if sources is not None: + for source in sources: + sources_dict.update(**source.model_dump()) + + return Config(config_yaml=default_config_yaml, **sources_dict) diff --git a/services/fins/tests_fins/fixtures/joint.py b/services/fins/tests_fins/fixtures/joint.py new file mode 100644 index 00000000..798bd0a4 --- /dev/null +++ b/services/fins/tests_fins/fixtures/joint.py @@ -0,0 +1,148 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Join the functionality of all fixtures for API-level integration testing.""" + +__all__ = [ + "FILE_INFORMATION_MOCK", + "INCOMING_PAYLOAD_MOCK", + "joint_fixture", + "JointFixture", + "kafka_container_fixture", + "kafka_fixture", + "mongodb_container_fixture", + "mongodb_fixture", +] +from collections.abc import AsyncGenerator +from dataclasses import dataclass + +import ghga_event_schemas.pydantic_ as event_schemas +import httpx +import pytest_asyncio +from ghga_service_commons.api.testing import AsyncTestClient +from ghga_service_commons.utils.utc_dates import now_as_utc +from hexkit.providers.akafka import KafkaEventSubscriber, KafkaOutboxSubscriber +from hexkit.providers.akafka.testutils import ( + KafkaFixture, + kafka_container_fixture, + kafka_fixture, +) +from hexkit.providers.mongodb import MongoDbDaoFactory +from hexkit.providers.mongodb.testutils import ( + MongoDbFixture, + mongodb_container_fixture, + mongodb_fixture, +) + +from fins.adapters.inbound.dao import ( + get_file_deletion_requested_dao, + get_file_information_dao, +) +from fins.config import Config +from fins.core import models +from fins.inject import ( + prepare_core, + prepare_event_subscriber, + prepare_outbox_subscriber, + prepare_rest_app, +) +from fins.ports.inbound.dao import FileDeletionRequestedDaoPort, FileInformationDaoPort +from fins.ports.inbound.information_service import InformationServicePort +from tests_fins.fixtures.config import get_config + +FILE_ID = "test-file" +DECRYPTED_SHA256 = "fake-checksum" +DECRYPTED_SIZE = 12345678 + +INCOMING_PAYLOAD_MOCK = event_schemas.FileInternallyRegistered( + s3_endpoint_alias="test-node", + file_id=FILE_ID, + object_id="test-object", + bucket_id="test-bucket", + upload_date=now_as_utc().isoformat(), + decrypted_size=DECRYPTED_SIZE, + decrypted_sha256=DECRYPTED_SHA256, + encrypted_part_size=1, + encrypted_parts_md5=["some", "checksum"], + encrypted_parts_sha256=["some", "checksum"], + content_offset=1234, + decryption_secret_id="some-secret", +) + +FILE_INFORMATION_MOCK = models.FileInformation( + file_id=FILE_ID, sha256_hash=DECRYPTED_SHA256, size=DECRYPTED_SIZE +) + + +@dataclass +class JointFixture: + """Returned by the `joint_fixture`.""" + + config: Config + information_service: InformationServicePort + file_information_dao: FileInformationDaoPort + file_deletion_requested_dao: FileDeletionRequestedDaoPort + rest_client: httpx.AsyncClient + event_subscriber: KafkaEventSubscriber + outbox_subscriber: KafkaOutboxSubscriber + mongodb: MongoDbFixture + kafka: KafkaFixture + + +@pytest_asyncio.fixture +async def joint_fixture( + mongodb: MongoDbFixture, + kafka: KafkaFixture, +) -> AsyncGenerator[JointFixture, None]: + """A fixture that embeds all other fixtures for API-level integration testing""" + config = get_config( + sources=[ + mongodb.config, + kafka.config, + ] + ) + + dao_factory = MongoDbDaoFactory(config=config) + file_information_dao = await get_file_information_dao(dao_factory=dao_factory) + file_deletion_requested_dao = await get_file_deletion_requested_dao( + dao_factory=dao_factory + ) + + # prepare everything except the outbox subscriber + async with ( + prepare_core(config=config) as information_service, + prepare_rest_app( + config=config, information_service_override=information_service + ) as app, + prepare_event_subscriber( + config=config, information_service_override=information_service + ) as event_subscriber, + prepare_outbox_subscriber( + config=config, + information_service_override=information_service, + ) as outbox_subscriber, + AsyncTestClient(app=app) as rest_client, + ): + yield JointFixture( + config=config, + information_service=information_service, + file_deletion_requested_dao=file_deletion_requested_dao, + file_information_dao=file_information_dao, + rest_client=rest_client, + event_subscriber=event_subscriber, + outbox_subscriber=outbox_subscriber, + mongodb=mongodb, + kafka=kafka, + ) diff --git a/services/fins/tests_fins/fixtures/test_config.yaml b/services/fins/tests_fins/fixtures/test_config.yaml new file mode 100644 index 00000000..e4c5af07 --- /dev/null +++ b/services/fins/tests_fins/fixtures/test_config.yaml @@ -0,0 +1,11 @@ +service_name: fins + +service_instance_id: '1' +kafka_servers: ["kafka:9092"] + +db_connection_str: "mongodb://mongodb:27017" +db_name: "dev_db" + +files_to_delete_topic: file_deletions +file_registered_event_topic: internal_file_registry +file_registered_event_type: file_registered diff --git a/services/fins/tests_fins/fixtures/utils.py b/services/fins/tests_fins/fixtures/utils.py new file mode 100644 index 00000000..85997a97 --- /dev/null +++ b/services/fins/tests_fins/fixtures/utils.py @@ -0,0 +1,20 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""General testing utilities""" + +from pathlib import Path + +BASE_DIR = Path(__file__).parent.resolve() diff --git a/services/fins/tests_fins/test_typical_journey.py b/services/fins/tests_fins/test_typical_journey.py new file mode 100644 index 00000000..e3fa5d6d --- /dev/null +++ b/services/fins/tests_fins/test_typical_journey.py @@ -0,0 +1,118 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests typical user journeys""" + +import logging + +import ghga_event_schemas.pydantic_ as event_schemas +import pytest +from hexkit.protocols.dao import ResourceNotFoundError + +from fins.core.models import FileInformation +from tests_fins.fixtures.joint import ( + FILE_INFORMATION_MOCK, + INCOMING_PAYLOAD_MOCK, + JointFixture, + joint_fixture, # noqa: F401 + kafka_container_fixture, # noqa: F401 + kafka_fixture, # noqa: F401 + mongodb_container_fixture, # noqa: F401 + mongodb_fixture, # noqa: F401 +) + +pytestmark = pytest.mark.asyncio() + +CHANGED_TYPE = "upserted" + + +async def test_normal_journey( + joint_fixture: JointFixture, # noqa: F811 + caplog, +): + """Simulates a typical, successful API journey.""" + # Test population path + file_id = INCOMING_PAYLOAD_MOCK.file_id + + await joint_fixture.kafka.publish_event( + payload=INCOMING_PAYLOAD_MOCK.model_dump(), + type_=joint_fixture.config.file_registered_event_type, + topic=joint_fixture.config.file_registered_event_topic, + ) + await joint_fixture.event_subscriber.run(forever=False) + + file_information = await joint_fixture.file_information_dao.get_by_id(file_id) + assert file_information == FILE_INFORMATION_MOCK + + # Test reregistration of identical content + expected_message = f"Found existing information for file {file_id}" + + caplog.clear() + with caplog.at_level(level=logging.DEBUG, logger="fins.core.information_service"): + await joint_fixture.kafka.publish_event( + payload=INCOMING_PAYLOAD_MOCK.model_dump(), + type_=joint_fixture.config.file_registered_event_type, + topic=joint_fixture.config.file_registered_event_topic, + ) + await joint_fixture.event_subscriber.run(forever=False) + assert len(caplog.messages) == 1 + assert expected_message in caplog.messages + + # Test reregistration of mismatching content + mismatch_message = f"Mismatching information for the file with ID { + file_id} has already been registered." + mismatch_mock = INCOMING_PAYLOAD_MOCK.model_copy( + update={"decrypted_sha256": "other-fake-checksum"} + ) + + caplog.clear() + with caplog.at_level(level=logging.DEBUG, logger="fins.core.information_service"): + await joint_fixture.kafka.publish_event( + payload=mismatch_mock.model_dump(), + type_=joint_fixture.config.file_registered_event_type, + topic=joint_fixture.config.file_registered_event_topic, + ) + await joint_fixture.event_subscriber.run(forever=False) + assert len(caplog.messages) == 2 + assert expected_message in caplog.messages + assert mismatch_message in caplog.messages + + # Test requesting existing file information + base_url = f"{joint_fixture.config.api_root_path}/file_information" + url = f"{base_url}/{file_id}" + response = await joint_fixture.rest_client.get(url) + assert response.status_code == 200 + assert FileInformation(**response.json()) == FILE_INFORMATION_MOCK + + # Test requesting invalid file information + url = f"{base_url}/invalid" + response = await joint_fixture.rest_client.get(url) + assert response.status_code == 404 + + # requst deletion + deletion_requested = event_schemas.FileDeletionRequested(file_id=file_id) + + await joint_fixture.kafka.publish_event( + payload=deletion_requested.model_dump(), + type_=CHANGED_TYPE, + topic=joint_fixture.config.files_to_delete_topic, + ) + await joint_fixture.outbox_subscriber.run(forever=False) + + # assert information is gone + with pytest.raises(ResourceNotFoundError): + file_information = await joint_fixture.file_information_dao.get_by_id( + id_=file_id + )