From 62162a102a5241a0ce9c6671f3a1a44fd52bd40b Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:46:23 -0400 Subject: [PATCH 1/8] fix: Use shutil.move instead of os.rename https://github.com/OpenDataServices/lib-cove/issues/84 Original pull request: https://github.com/OpenDataServices/lib-cove/pull/115 Ben has edited the changelog only. Co-authored-by: James McKinney <26463+jpmckinney@users.noreply.github.com> Co-authored-by: Ben Webb --- CHANGELOG.md | 4 ++++ libcove/lib/common.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c46965f..42b00ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Fixed + +- Fix crash when tmp directory is on different filesystem https://github.com/OpenDataServices/lib-cove/issues/84 + ## Changed ## [0.30.0] - 2023-03-10 diff --git a/libcove/lib/common.py b/libcove/lib/common.py index dbec286..1181008 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -8,6 +8,7 @@ import numbers import os import re +import shutil from tempfile import NamedTemporaryFile from urllib.parse import urljoin, urlparse, urlsplit from urllib.request import urlopen @@ -1445,7 +1446,7 @@ def get_orgids_prefixes(orgids_url=None): # Use a tempfile and move to create new file here for atomicity with NamedTemporaryFile(mode="w", delete=False) as tmp: json.dump(org_id_file_contents, tmp, indent=2) - os.rename(tmp.name, local_org_ids_file) + shutil.move(tmp.name, local_org_ids_file) # Return either the original file data, if it was found to be fresh, or the new data, if we were able to retrieve it. return [org_list["code"] for org_list in org_id_file_contents["lists"]] From ec4d5ee8027b350741fe06e98769cc70a4a170ba Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:46:03 -0400 Subject: [PATCH 2/8] fix: Use sentence case for "Array element" https://github.com/OpenDataServices/lib-cove/issues/28 Original pull request: https://github.com/OpenDataServices/lib-cove/pull/116 Ben has edited the changelog only. Co-authored-by: James McKinney <26463+jpmckinney@users.noreply.github.com> Co-authored-by: Ben Webb --- CHANGELOG.md | 1 + libcove/lib/common.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42b00ff..4686701 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed - Fix crash when tmp directory is on different filesystem https://github.com/OpenDataServices/lib-cove/issues/84 +- Use sentence case consistently in validation error messages https://github.com/OpenDataServices/lib-cove/issues/28 ## Changed diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 1181008..5a717e6 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -888,7 +888,7 @@ def get_schema_validation_errors( header = e.path[-1] if isinstance(e.path[-1], int) and len(e.path) >= 2: # We're dealing with elements in an array of items at this point - pre_header = "Array Element " + pre_header = "Array element " header_extra = "{}/[number]".format(e.path[-2]) null_clause = "" From cf97ed88977b8a3a597f11b0bb89d1ec94b86910 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Thu, 16 Mar 2023 14:45:37 -0400 Subject: [PATCH 3/8] fix: Force jsonschema to use our validator https://github.com/OpenDataServices/lib-cove/pull/118 Ben has edited the changelog only. Co-authored-by: James McKinney <26463+jpmckinney@users.noreply.github.com> Co-authored-by: Ben Webb --- CHANGELOG.md | 2 +- libcove/lib/common.py | 17 +++++++++++++---- tests/lib/test_common.py | 6 ------ tests/lib/test_converters.py | 5 ----- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4686701..5824f51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Fix crash when tmp directory is on different filesystem https://github.com/OpenDataServices/lib-cove/issues/84 - Use sentence case consistently in validation error messages https://github.com/OpenDataServices/lib-cove/issues/28 +- Support jsonschema>=4.10 https://github.com/OpenDataServices/lib-cove/pull/118 ## Changed @@ -19,7 +20,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Allow jsonschema version 4. - Support arrays of strings that must be on a codelist https://github.com/ThreeSixtyGiving/dataquality/issues/80 - ## [0.29.0] - 2022-12-14 - Add `SchemaJsonMixin.process_codelists` (previously only existed in lib-cove-ocds, will be used by 360 CoVE) https://github.com/OpenDataServices/lib-cove/pull/109 diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 5a717e6..4465342 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -721,11 +721,9 @@ def get_additional_codelist_values(schema_obj, json_data): path_string = "/".join(path_no_num) if path_string not in additional_codelist_values: - codelist_url = schema_obj.codelists + codelist codelist_amend_urls = [] if hasattr(schema_obj, "extended_codelist_urls"): - # Replace URL if this codelist is overridden by an extension. # Last one to be applied wins. if schema_obj.extended_codelist_urls.get(codelist): @@ -772,7 +770,6 @@ def get_additional_fields_info(json_data, schema_fields, context, fields_regex=F root_additional_fields = set() for field, field_info in fields_present.items(): - if field in schema_fields: continue if fields_regex and LANGUAGE_RE.search(field.split("/")[-1]): @@ -805,7 +802,6 @@ def get_counts_additional_fields( fields_regex=False, additional_fields_info=None, ): - if not additional_fields_info: schema_fields = schema_obj.get_pkg_schema_fields() additional_fields_info = get_additional_fields_info( @@ -851,6 +847,12 @@ def get_schema_validation_errors( schema_url=schema_obj.schema_host, ) + # Force jsonschema to use our validator. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + validator + ) + our_validator = validator( pkg_schema_obj, format_checker=format_checker, resolver=resolver ) @@ -1001,6 +1003,13 @@ def get_schema_validation_errors( validation_errors[ json.dumps(unique_validator_key, default=decimal_default) ].append(value) + + # Restore jsonschema's default validator, to not interfere with other software. + # https://github.com/python-jsonschema/jsonschema/issues/994 + jsonschema.validators.validates("http://json-schema.org/draft-04/schema#")( + jsonschema.validators.Draft4Validator + ) + return dict(validation_errors) diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index 6296933..fd4ae36 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -333,7 +333,6 @@ def test_get_schema_deprecated_paths(): def test_schema_dict_fields_generator_release_schema_deprecated_fields(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -362,7 +361,6 @@ def test_schema_dict_fields_generator_release_schema_deprecated_fields(): def test_schema_dict_fields_generator_schema_with_list_and_oneof(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -396,7 +394,6 @@ def test_schema_dict_fields_generator_schema_with_list_and_oneof(): def test_fields_present_generator_tenders_releases_2_releases(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -449,7 +446,6 @@ def test_fields_present_generator_tenders_releases_2_releases(): def test_fields_present_generator_data_root_is_list(): - with open( os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -501,7 +497,6 @@ def test_fields_present_generator_data_root_is_list(): def test_get_additional_fields_info(): - simple_data = { "non_additional_field": "a", "non_additional_list": [1, 2], @@ -1227,7 +1222,6 @@ def test_get_field_coverage_oc4ids(): ), ) def test_oneOfEnumSelectorField(data, count, errors): - with open(common_fixtures("schema_with_one_of_enum_selector_field.json")) as fp: schema = json.load(fp) diff --git a/tests/lib/test_converters.py b/tests/lib/test_converters.py index 11a8669..4660301 100644 --- a/tests/lib/test_converters.py +++ b/tests/lib/test_converters.py @@ -8,7 +8,6 @@ def test_convert_json_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) @@ -55,7 +54,6 @@ def test_convert_json_1(): def test_convert_activity_xml_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-iati-tests-", dir=tempfile.gettempdir() ) @@ -110,7 +108,6 @@ def test_convert_activity_xml_1(): def test_convert_org_xml_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-iati-tests-", dir=tempfile.gettempdir() ) @@ -166,7 +163,6 @@ def test_convert_org_xml_1(): def test_convert_json_root_is_list_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) @@ -214,7 +210,6 @@ def test_convert_json_root_is_list_1(): def test_convert_csv_1(): - cove_temp_folder = tempfile.mkdtemp( prefix="lib-cove-ocds-tests-", dir=tempfile.gettempdir() ) From 98f79e7e5cfdcfd2889ed2765ee0abb966da252e Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 6 Jul 2023 15:36:10 +0100 Subject: [PATCH 4/8] changelog: Fix badly formated/ordered headings from previous releases --- CHANGELOG.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5824f51..dfa5bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,15 +13,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Use sentence case consistently in validation error messages https://github.com/OpenDataServices/lib-cove/issues/28 - Support jsonschema>=4.10 https://github.com/OpenDataServices/lib-cove/pull/118 -## Changed - ## [0.30.0] - 2023-03-10 +### Changed + - Allow jsonschema version 4. - Support arrays of strings that must be on a codelist https://github.com/ThreeSixtyGiving/dataquality/issues/80 ## [0.29.0] - 2022-12-14 +### Changed + - Add `SchemaJsonMixin.process_codelists` (previously only existed in lib-cove-ocds, will be used by 360 CoVE) https://github.com/OpenDataServices/lib-cove/pull/109 ## [0.28.0] - 2022-11-18 @@ -39,7 +41,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [0.27.0] - 2021-11-02 -## Added +### Added - oneOf validator will read a new "oneOfEnumSelectorField" option in schema and use that to pick subschema. (Previously this worked for "statementType" only, for BODS) @@ -50,25 +52,25 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [0.26.1] - 2021-10-01 -## Changed +### Changed - Lock to jsonschema version 3 (we use internal tools that are not available in V4) ## [0.26.0] - 2021-09-15 -## Changed +### Changed - Various performance improvements https://github.com/open-contracting/lib-cove-oc4ids/issues/23 ## [0.25.0] - 2021-08-18 -## Added +### Added - Add a function to calculate field coverage https://github.com/open-contracting/cove-oc4ids/issues/98 ## [0.24.0] - 2021-05-20 -## Changed +### Changed - Update `unique_ids` override to support multiple ids. If you called `unique_ids` with `id_name="some_id"`, you now need to call `id_names=["some_id"]`. See this lib-cove-ocds PR as an example: https://github.com/open-contracting/lib-cove-ocds/pull/91/files @@ -82,7 +84,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Drop Python 3.5 support https://github.com/OpenDataServices/lib-cove/pull/81 -## CHanged +### Changed - Remove unused dependencies from setup.py https://github.com/OpenDataServices/lib-cove/pull/80 From b2f72bd267c44732b23243a7ba5b6a809e990a99 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:25:49 -0400 Subject: [PATCH 5/8] Eliminate cache size limit https://github.com/OpenDataServices/lib-cove/pull/120 Ben has edited the changelog only. Co-authored-by: James McKinney <26463+jpmckinney@users.noreply.github.com> Co-authored-by: Ben Webb --- CHANGELOG.md | 4 ++++ libcove/lib/tools.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa5bee..088b931 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Changed + +- Eliminate size limit on caching requests https://github.com/OpenDataServices/lib-cove/pull/120 + ### Fixed - Fix crash when tmp directory is on different filesystem https://github.com/OpenDataServices/lib-cove/issues/84 diff --git a/libcove/lib/tools.py b/libcove/lib/tools.py index 96975ec..4a62b14 100644 --- a/libcove/lib/tools.py +++ b/libcove/lib/tools.py @@ -6,7 +6,7 @@ from .exceptions import UnrecognisedFileType -@lru_cache(maxsize=64) +@lru_cache(maxsize=None) def cached_get_request(url): return requests.get(url) From 01ac5c79949a56c40ff7fa5adc90f443299c13a1 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 5 Jul 2023 14:50:22 -0400 Subject: [PATCH 6/8] ci: Remove bods integration test, as bods has migrated to lib-cove-2 --- .github/workflows/integration.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 026326b..21907bf 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - cove: [ 'oc4ids' , 'ocds' , 'bods'] + cove: [ 'oc4ids' , 'ocds' ] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v1 @@ -30,15 +30,6 @@ jobs: cd .. git clone https://github.com/open-contracting/lib-cove-ocds.git - - name: bods - if: matrix.cove == 'bods' - run: | - git clone https://github.com/openownership/cove-bods.git - cd cove-bods - git checkout master - cd .. - git clone https://github.com/openownership/lib-cove-bods.git - - name: Install run: | pip install -r cove-${{ matrix.cove }}/requirements.txt From ac70ad43085b6a2227310d7a0b1772019d49f814 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 6 Jul 2023 15:53:06 +0100 Subject: [PATCH 7/8] ci: Use an older copy of lib-cove-ocds (as main has changes that cove-ocds isn't compatible with yet) --- .github/workflows/integration.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 21907bf..93f34aa 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -29,6 +29,8 @@ jobs: git checkout main cd .. git clone https://github.com/open-contracting/lib-cove-ocds.git + cd lib-cove-ocds + git checkout 0.11.3 - name: Install run: | From 175bb030b37aa684fd7df32357eb1ee69c5d3665 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 6 Jul 2023 16:19:08 +0100 Subject: [PATCH 8/8] release: 0.31.0 --- CHANGELOG.md | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 088b931..afdcf84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [0.31.0] - 2023-07-06 + ### Changed - Eliminate size limit on caching requests https://github.com/OpenDataServices/lib-cove/pull/120 diff --git a/setup.py b/setup.py index eb0ccdb..1606175 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="libcove", - version="0.30.0", + version="0.31.0", author="Open Data Services", author_email="code@opendataservices.coop", url="https://github.com/OpenDataServices/lib-cove",