From 71c41a9d575152327f202ed145dc6902a1719edb Mon Sep 17 00:00:00 2001 From: PagesCoffy Date: Thu, 3 Oct 2024 14:07:05 +0000 Subject: [PATCH] [Integration][GitLab] - Fix Infinite Loop When Syncing Folders (#1061) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description What - The Gitlab folder kind had a bug where it get stuck in infinite loop, and the same data get returned for every page index. Upon investigation, it was discovered that the pagination parameters, especially the [keyset pagination](https://docs.gitlab.com/ee/api/rest/index.html#supported-resources) was behind this error since the docs does not provide options for controlling the pagination on the repository tree endpoint. Why - How - This was resolved by using the standard offset pagination where we pass the page index and page size to the repository tree API ## Type of change Please leave one option from the following and delete the rest: - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] New Integration (non-breaking change which adds a new integration) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Non-breaking change (fix of existing functionality that will not change current behavior) - [ ] Documentation (added/updated documentation)

All tests should be run against the port production environment(using a testing org).

### Core testing checklist - [ ] Integration able to create all default resources from scratch - [ ] Resync finishes successfully - [ ] Resync able to create entities - [ ] Resync able to update entities - [ ] Resync able to detect and delete entities - [ ] Scheduled resync able to abort existing resync and start a new one - [ ] Tested with at least 2 integrations from scratch - [ ] Tested with Kafka and Polling event listeners - [ ] Tested deletion of entities that don't pass the selector ### Integration testing checklist - [ ] Integration able to create all default resources from scratch - [ ] Resync able to create entities - [ ] Resync able to update entities - [ ] Resync able to detect and delete entities - [ ] Resync finishes successfully - [ ] If new resource kind is added or updated in the integration, add example raw data, mapping and expected result to the `examples` folder in the integration directory. - [ ] If resource kind is updated, run the integration with the example data and check if the expected result is achieved - [ ] If new resource kind is added or updated, validate that live-events for that resource are working as expected - [ ] Docs PR link [here](#) ### Preflight checklist - [ ] Handled rate limiting - [ ] Handled pagination - [ ] Implemented the code in async - [ ] Support Multi account ## Screenshots Screenshot 2024-10-02 at 4 52 11 PM Screenshot 2024-10-02 at 4 52 05 PM ## API Documentation Provide links to the API documentation used for this integration. --- integrations/gitlab/CHANGELOG.md | 8 + .../gitlab_integration/gitlab_service.py | 3 - integrations/gitlab/pyproject.toml | 2 +- .../test_gitlab_service_folder.py | 146 ++++++++++++++++++ 4 files changed, 155 insertions(+), 4 deletions(-) create mode 100644 integrations/gitlab/tests/gitlab_integration/test_gitlab_service_folder.py diff --git a/integrations/gitlab/CHANGELOG.md b/integrations/gitlab/CHANGELOG.md index 840c8fdbe7..2fc6096b0c 100644 --- a/integrations/gitlab/CHANGELOG.md +++ b/integrations/gitlab/CHANGELOG.md @@ -7,6 +7,14 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm +0.1.129 (2024-10-02) +==================== + +### Bug Fixes + +- Removed keyset pagination parameters from the listing of repository tree so the application can paginate data using the standard page index and page size parameters in the AsyncFetcher.fetch_batch (0.1.129) + + 0.1.128 (2024-10-02) ==================== diff --git a/integrations/gitlab/gitlab_integration/gitlab_service.py b/integrations/gitlab/gitlab_integration/gitlab_service.py index 05e0657db1..db11e3b7f3 100644 --- a/integrations/gitlab/gitlab_integration/gitlab_service.py +++ b/integrations/gitlab/gitlab_integration/gitlab_service.py @@ -550,9 +550,6 @@ async def get_all_folders_in_project_path( validation_func=self.validate_file_is_directory, path=folder_selector.path, ref=branch, - pagination="keyset", - order_by="id", - sort="asc", ): repository_tree_files: List[dict[str, Any]] = typing.cast( List[dict[str, Any]], repository_tree_batch diff --git a/integrations/gitlab/pyproject.toml b/integrations/gitlab/pyproject.toml index 07f2da860b..67ec046239 100644 --- a/integrations/gitlab/pyproject.toml +++ b/integrations/gitlab/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gitlab" -version = "0.1.128" +version = "0.1.129" description = "Gitlab integration for Port using Port-Ocean Framework" authors = ["Yair Siman-Tov "] diff --git a/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_folder.py b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_folder.py new file mode 100644 index 0000000000..6a2efd0e38 --- /dev/null +++ b/integrations/gitlab/tests/gitlab_integration/test_gitlab_service_folder.py @@ -0,0 +1,146 @@ +from typing import Any, List, Optional +from unittest.mock import MagicMock +import pytest +from gitlab_integration.gitlab_service import GitlabService + + +# Mock function simulating repository tree retrieval with pagination and path filtering +def mock_repository_tree( + path: str, page: int, *args: Any, **kwargs: Any +) -> Optional[List[dict]]: + if path == "src": + if page == 1: + return [ + { + "id": "fd581c619bf59cfdfa9c8282377bb09c2f897520", + "name": "markdown", + "type": "tree", + "path": "src/markdown", + "mode": "040000", + } + ] + elif page == 2: + return [ + { + "id": "23ea4d11a4bdd960ee5320c5cb65b5b3fdbc60db", + "name": "ruby", + "type": "tree", + "path": "src/ruby", + "mode": "040000", + }, + { + "id": "e7e3e4c1b7a0a0d1e0c1f4e0a0d1e0c1f4e0a0d", + "name": "gitlab_ci.yml", + "type": "blob", + "path": "src/python", + "mode": "040000", + }, + ] + else: + return [] # No more pages + elif path == "files": + if page == 1: + return [ + { + "id": "4535904260b1082e14f867f7a24fd8c21495bde3", + "name": "images", + "type": "tree", + "path": "files/images", + "mode": "040000", + } + ] + else: + return [] # No more pages + else: + return [] # Path not found + + +@pytest.mark.asyncio +async def test_get_all_folders_in_project_path_with_folders( + monkeypatch: Any, mocked_gitlab_service: GitlabService +) -> None: + # Arrange + mock_project = MagicMock() + mock_project.path_with_namespace = "namespace/project" + mock_project.default_branch = "main" + mock_project.asdict.return_value = { + "name": "namespace/project", + "default_branch": "main", + } + + mock_folder_selector = MagicMock() + mock_folder_selector.path = "src" + mock_folder_selector.branch = None + + monkeypatch.setattr( + mock_project, "repository_tree", MagicMock(side_effect=mock_repository_tree) + ) + + expected_folders = [ + { + "folder": { + "id": "fd581c619bf59cfdfa9c8282377bb09c2f897520", + "name": "markdown", + "type": "tree", + "path": "src/markdown", + "mode": "040000", + }, + "repo": mock_project.asdict(), + "__branch": "main", + }, + { + "folder": { + "id": "23ea4d11a4bdd960ee5320c5cb65b5b3fdbc60db", + "name": "ruby", + "type": "tree", + "path": "src/ruby", + "mode": "040000", + }, + "repo": mock_project.asdict(), + "__branch": "main", + }, + ] + + # Act + actual_folders = [] + async for folder_batch in mocked_gitlab_service.get_all_folders_in_project_path( + mock_project, mock_folder_selector + ): + actual_folders.extend(folder_batch) + + # Assert + assert actual_folders == expected_folders + + +@pytest.mark.asyncio +async def test_get_all_folders_in_project_path_no_folders( + monkeypatch: Any, mocked_gitlab_service: GitlabService +) -> None: + # Arrange + mock_project = MagicMock() + mock_project.path_with_namespace = "namespace/project" + mock_project.default_branch = "main" + mock_project.asdict.return_value = { + "name": "namespace/project", + "default_branch": "main", + } + + mock_folder_selector = MagicMock() + mock_folder_selector.path = "non_existing_path" # No folders exist here + mock_folder_selector.branch = None + + monkeypatch.setattr( + mock_project, "repository_tree", MagicMock(side_effect=mock_repository_tree) + ) + + expected_folders: list[dict[str, Any]] = [] + + # Act + actual_folders = [] + async for folder_batch in mocked_gitlab_service.get_all_folders_in_project_path( + mock_project, mock_folder_selector + ): + actual_folders.extend(folder_batch) + + # Assert + assert actual_folders == expected_folders