From d307874fc9145702ca461fe77374e2beb620ffb7 Mon Sep 17 00:00:00 2001 From: James B Date: Fri, 13 Oct 2023 11:36:15 +0100 Subject: [PATCH] New "geo" optional dependencies https://github.com/OpenDataServices/flatten-tool/issues/424 https://github.com/OpenDataServices/flatten-tool/pull/433 --- .github/workflows/test.yml | 16 ++--- CHANGELOG.md | 7 ++ flattentool/input.py | 39 +++++++---- flattentool/json_input.py | 29 +++++--- flattentool/tests/test_docs.py | 22 +++++- .../tests/test_input_SpreadsheetInput.py | 1 + .../test_input_SpreadsheetInput_unflatten.py | 68 +++++++++++++++++++ flattentool/tests/test_json_input.py | 2 + pyproject.toml | 4 ++ setup.py | 4 +- 10 files changed, 155 insertions(+), 37 deletions(-) create mode 100644 pyproject.toml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 388f8a3..47c8af9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,15 +15,15 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - uses: actions/cache@v1 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }}-${{ matrix.python-version }} - restore-keys: | - ${{ runner.os }}-pip- - - run: pip install --upgrade -r requirements_dev.txt + - name: Install normal dependencies + run: pip install -r requirements_dev.txt - run: pip install 'jsonref${{ matrix.jsonref-version }}' - - run: py.test --cov . + - name: Test with normal dependencies + run: py.test -m "not geo" --cov . + - name: Install geo dependencies + run: pip install -e .[geo] + - name: Test with geo dependencies + run: py.test -m "geo" --cov --cov-append . - env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: coveralls --service=github diff --git a/CHANGELOG.md b/CHANGELOG.md index 98cc978..dabd8a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Changed + +- New "geo" optional python dependency and some existing python dependencies moved to it. + If you were using this functionality before, you'll need to start installing "geo" to get same behaviour. + https://github.com/OpenDataServices/flatten-tool/issues/424 + https://github.com/OpenDataServices/flatten-tool/pull/433 + ## [0.23.0] - 2023-08-30 ### Changed diff --git a/flattentool/input.py b/flattentool/input.py index f3e2e24..cc9137a 100644 --- a/flattentool/input.py +++ b/flattentool/input.py @@ -13,10 +13,17 @@ from decimal import Decimal, InvalidOperation from warnings import warn -import geojson import openpyxl import pytz -import shapely.wkt + +try: + import geojson + import shapely.wkt + + SHAPELY_AND_GEOJSON_LIBRARIES_AVAILABLE = True +except ImportError: + SHAPELY_AND_GEOJSON_LIBRARIES_AVAILABLE = False + from openpyxl.utils.cell import _get_column_letter from flattentool.exceptions import DataErrorWarning @@ -106,18 +113,22 @@ def convert_type(type_string, value, timezone=pytz.timezone("UTC"), convert_flag return value.date().isoformat() return str(value) elif convert_flags.get("wkt") and type_string == "geojson": - try: - geom = shapely.wkt.loads(value) - except shapely.errors.GEOSException as e: - warn( - _( - 'An invalid WKT string was supplied "{value}", the message from the parser was: {parser_msg}' - ).format(value=value, parser_msg=str(e)), - DataErrorWarning, - ) - return - feature = geojson.Feature(geometry=geom, properties={}) - return feature.geometry + if SHAPELY_AND_GEOJSON_LIBRARIES_AVAILABLE: + try: + geom = shapely.wkt.loads(value) + except shapely.errors.GEOSException as e: + warn( + _( + 'An invalid WKT string was supplied "{value}", the message from the parser was: {parser_msg}' + ).format(value=value, parser_msg=str(e)), + DataErrorWarning, + ) + return + feature = geojson.Feature(geometry=geom, properties={}) + return feature.geometry + else: + warn("Install flattentool's optional geo dependencies to use geo features.") + return str(value) elif type_string == "": if type(value) == datetime.datetime: return timezone.localize(value).isoformat() diff --git a/flattentool/json_input.py b/flattentool/json_input.py index 505aa0d..ab28ec7 100644 --- a/flattentool/json_input.py +++ b/flattentool/json_input.py @@ -16,7 +16,13 @@ import BTrees.OOBTree import ijson -import shapely.geometry + +try: + import shapely.wkt + + SHAPELY_LIBRARY_AVAILABLE = True +except ImportError: + SHAPELY_LIBRARY_AVAILABLE = False import transaction import xmltodict import zc.zlibstorage @@ -363,14 +369,19 @@ def parse_json_dict( and "type" in json_dict and "coordinates" in json_dict ): - _sheet_key = sheet_key(sheet, parent_name.strip("/")) - try: - geom = shapely.geometry.shape(json_dict) - except (shapely.errors.GeometryTypeError, TypeError, ValueError) as e: - warn(_("Invalid GeoJSON: {parser_msg}").format(parser_msg=repr(e))) - return - flattened_dict[_sheet_key] = geom.wkt - skip_type_and_coordinates = True + if SHAPELY_LIBRARY_AVAILABLE: + _sheet_key = sheet_key(sheet, parent_name.strip("/")) + try: + geom = shapely.geometry.shape(json_dict) + except (shapely.errors.GeometryTypeError, TypeError, ValueError) as e: + warn(_("Invalid GeoJSON: {parser_msg}").format(parser_msg=repr(e))) + return + flattened_dict[_sheet_key] = geom.wkt + skip_type_and_coordinates = True + else: + warn( + "Install flattentool's optional geo dependencies to use geo features." + ) parent_id_fields = copy.copy(parent_id_fields) or OrderedDict() if flattened_dict is None: diff --git a/flattentool/tests/test_docs.py b/flattentool/tests/test_docs.py index 5c4aef9..13793b8 100644 --- a/flattentool/tests/test_docs.py +++ b/flattentool/tests/test_docs.py @@ -8,11 +8,13 @@ import pytest examples_in_docs_data = [] +examples_in_docs_data_geo = [] def _get_examples_in_docs_data(): - global examples_in_docs_data + global examples_in_docs_data, examples_in_docs_data_geo examples_in_docs_data = [] + examples_in_docs_data_geo = [] for root, dirs, files in os.walk("examples"): for filename in files: if root.startswith("examples/help/") and sys.version_info[:2] != (3, 8): @@ -21,7 +23,10 @@ def _get_examples_in_docs_data(): # (Same as we lint code with, so dev's can have one virtual env) continue if "cmd.txt" in filename: - examples_in_docs_data.append((root, filename)) + if root.startswith("examples/wkt"): + examples_in_docs_data_geo.append((root, filename)) + else: + examples_in_docs_data.append((root, filename)) _get_examples_in_docs_data() @@ -45,6 +50,17 @@ def test_examples_receipt(): @pytest.mark.parametrize("root, filename", examples_in_docs_data) def test_example_in_doc(root, filename): + _test_example_in_doc_worker(root, filename) + + +@pytest.mark.parametrize("root, filename", examples_in_docs_data_geo) +@pytest.mark.geo +def test_example_in_doc_geo(root, filename): + _test_example_in_doc_worker(root, filename) + + +@pytest.mark.parametrize("root, filename", examples_in_docs_data) +def _test_example_in_doc_worker(root, filename): if os.path.exists(join(root, "actual")) and os.path.isdir(join(root, "actual")): os.rename(join(root, "actual"), join(root, "actual." + str(uuid.uuid4()))) os.mkdir(join(root, "actual")) @@ -141,7 +157,7 @@ def test_expected_number_of_examples_in_docs_data(): if sys.version_info[:2] != (3, 8): expected -= 3 # number of help tests - assert len(examples_in_docs_data) == expected + assert len(examples_in_docs_data) + len(examples_in_docs_data_geo) == expected def _simplify_warnings(lines): diff --git a/flattentool/tests/test_input_SpreadsheetInput.py b/flattentool/tests/test_input_SpreadsheetInput.py index 1f46d32..1a27986 100644 --- a/flattentool/tests/test_input_SpreadsheetInput.py +++ b/flattentool/tests/test_input_SpreadsheetInput.py @@ -533,6 +533,7 @@ def __str__(self): assert len(recwarn) == 0 +@pytest.mark.geo def test_convert_type_geojson(recwarn): assert convert_type( "geojson", "POINT (53.486434 -2.239353)", convert_flags={"wkt": True} diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten.py index ed52b13..fc5cb97 100644 --- a/flattentool/tests/test_input_SpreadsheetInput_unflatten.py +++ b/flattentool/tests/test_input_SpreadsheetInput_unflatten.py @@ -527,6 +527,9 @@ def inject_root_id(root_id, d): False, True, ), +] + +testdata_geo = [ ( "WKT->geojson conversion", [ @@ -1095,6 +1098,7 @@ def create_schema(root_id): ), ] + ROOT_ID_PARAMS = [ ("ocid", {}), # If not root_id kwarg is passed, then a root_id of ocid is assumed ("ocid", {"root_id": "ocid"}), @@ -1124,6 +1128,70 @@ def test_unflatten( warning_messages, reversible, works_without_schema, +): + _test_unflatten_worker( + convert_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, + works_without_schema, + ) + + +@pytest.mark.parametrize("convert_titles", [True, False]) +@pytest.mark.parametrize("use_schema", [True, False]) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,input_list,expected_output_list,warning_messages,reversible,works_without_schema", + testdata_geo, +) +@pytest.mark.geo +def test_unflatten_geo( + convert_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, + works_without_schema, +): + _test_unflatten_worker( + convert_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, + works_without_schema, + ) + + +def _test_unflatten_worker( + convert_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, + works_without_schema, ): if not use_schema and not works_without_schema: pytest.skip() diff --git a/flattentool/tests/test_json_input.py b/flattentool/tests/test_json_input.py index b530706..a98186e 100644 --- a/flattentool/tests/test_json_input.py +++ b/flattentool/tests/test_json_input.py @@ -852,6 +852,7 @@ def test_parse_ids_nested(self): ] +@pytest.mark.geo def test_parse_geojson(): parser = JSONParser( root_json_dict=[ @@ -900,6 +901,7 @@ def test_parse_geojson(): ] +@pytest.mark.geo def test_parse_bad_geojson(recwarn): parser = JSONParser( root_json_dict=[ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1a8479f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[tool.pytest.ini_options] +markers = [ + "geo: tests that need extra geo dependencies (deselect with '-m \"not geo\"')", +] diff --git a/setup.py b/setup.py index 75f48a2..5ce66f6 100644 --- a/setup.py +++ b/setup.py @@ -39,8 +39,6 @@ def run(self): "zodb", "zc.zlibstorage", "ijson", - "shapely", - "geojson", ] setup( @@ -54,7 +52,7 @@ def run(self): license="MIT", description="Tools for generating CSV and other flat versions of the structured data", install_requires=install_requires, - extras_require={"HTTP": ["requests"]}, + extras_require={"HTTP": ["requests"], "geo": ["shapely", "geojson"]}, cmdclass={ "install": InstallWithCompile, "develop": DevelopWithCompile,