-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
199 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: Lint | ||
on: [push, pull_request] | ||
|
||
jobs: | ||
lint: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Setup python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: 3.9 | ||
architecture: x64 | ||
- uses: actions/cache@v1 | ||
with: | ||
path: ~/.cache/pip | ||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }}-${{ matrix.python-version }} | ||
restore-keys: | | ||
${{ runner.os }}-pip- | ||
- run: pip install -e .[dev] | ||
- run: isort --check-only libcove2 setup.py | ||
- run: black --check libcove2 setup.py | ||
- run: flake8 libcove2 setup.py | ||
- run: mypy --install-types --non-interactive -p libcove2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
__pycache__ | ||
/dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
Cove - COnvert Validate & Explore | ||
================================= | ||
|
||
Cove - COnvert Validate & Explore is free software designed to help people check data | ||
published to various different data standards. | ||
|
||
Cove - COnvert Validate & Explore is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU Affero General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
|
||
Cove - COnvert Validate & Explore is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU Affero General Public License for more details. | ||
|
||
You should have received a copy of the GNU Affero General Public License | ||
along with Cove - COnvert Validate & Explore. If not, see <http://www.gnu.org/licenses/>. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import datetime | ||
import json | ||
import os | ||
import re | ||
import shutil | ||
from tempfile import NamedTemporaryFile | ||
|
||
import requests | ||
|
||
LANGUAGE_RE = re.compile( | ||
"^(.*_(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)))$" # noqa | ||
) | ||
|
||
|
||
def schema_dict_fields_generator(schema_dict): | ||
if "properties" in schema_dict and isinstance(schema_dict["properties"], dict): | ||
for property_name, value in schema_dict["properties"].items(): | ||
if "oneOf" in value: | ||
property_schema_dicts = value["oneOf"] | ||
else: | ||
property_schema_dicts = [value] | ||
for property_schema_dict in property_schema_dicts: | ||
if not isinstance(property_schema_dict, dict): | ||
continue | ||
if "properties" in property_schema_dict: | ||
for field in schema_dict_fields_generator(property_schema_dict): | ||
yield f"/{property_name}{field}" | ||
elif "items" in property_schema_dict: | ||
for field in schema_dict_fields_generator( | ||
property_schema_dict["items"] | ||
): | ||
yield f"/{property_name}{field}" | ||
yield f"/{property_name}" | ||
if "items" in schema_dict and isinstance(schema_dict["items"], dict): | ||
if "oneOf" in schema_dict["items"] and isinstance( | ||
schema_dict["items"]["oneOf"], list | ||
): | ||
for oneOf in schema_dict["items"]["oneOf"]: | ||
for field in schema_dict_fields_generator(oneOf): | ||
yield field | ||
|
||
|
||
def get_additional_fields_info(json_data, schema_fields, fields_regex=False): | ||
fields_present = get_fields_present_with_examples(json_data) | ||
|
||
additional_fields = {} | ||
root_additional_fields = set() | ||
|
||
for field, field_info in fields_present.items(): | ||
if field in schema_fields: | ||
continue | ||
if fields_regex and LANGUAGE_RE.search(field.split("/")[-1]): | ||
continue | ||
|
||
for root_additional_field in root_additional_fields: | ||
if field.startswith(root_additional_field): | ||
field_info["root_additional_field"] = False | ||
additional_fields[root_additional_field][ | ||
"additional_field_descendance" | ||
][field] = field_info | ||
break | ||
else: | ||
field_info["root_additional_field"] = True | ||
field_info["additional_field_descendance"] = {} | ||
root_additional_fields.add(field) | ||
|
||
field_info["path"] = "/".join(field.split("/")[:-1]) | ||
field_info["field_name"] = field.split("/")[-1] | ||
additional_fields[field] = field_info | ||
|
||
return additional_fields | ||
|
||
|
||
def get_fields_present_with_examples(*args, **kwargs): | ||
counter = {} | ||
for key, value in fields_present_generator(*args, **kwargs): | ||
if key not in counter: | ||
counter[key] = {"count": 1, "examples": []} | ||
else: | ||
counter[key]["count"] += 1 | ||
if len(counter[key]["examples"]) < 3: | ||
if not isinstance(value, (list, dict)): | ||
counter[key]["examples"].append(value) | ||
|
||
return counter | ||
|
||
|
||
def fields_present_generator(json_data, prefix=""): | ||
if isinstance(json_data, dict): | ||
for key, value in json_data.items(): | ||
new_key = f"{prefix}/{key}" | ||
yield new_key, value | ||
if isinstance(value, (dict, list)): | ||
yield from fields_present_generator(value, new_key) | ||
elif isinstance(json_data, list): | ||
for item in json_data: | ||
if isinstance(item, dict): | ||
yield from fields_present_generator(item, prefix) | ||
|
||
|
||
def org_id_file_fresh(org_id_file_contents, check_date): | ||
"""Unless the file was downloaded on greater than or equal to 'check_date' | ||
it is considered stale.""" | ||
org_id_file_date_downloaded_date = datetime.datetime.strptime( | ||
org_id_file_contents.get("downloaded", "2000-1-1"), "%Y-%m-%d" | ||
).date() | ||
return org_id_file_date_downloaded_date >= check_date | ||
|
||
|
||
def get_orgids_prefixes(orgids_url=None): | ||
"""Get org-ids.json file from file system | ||
(or fetch remotely if it doesn't exist)""" | ||
local_org_ids_file = os.path.join( | ||
os.path.dirname(os.path.realpath(__file__)), "org-ids.json" | ||
) | ||
today = datetime.date.today() | ||
if orgids_url is None: | ||
orgids_url = "http://org-id.guide/download.json" | ||
org_id_file_contents = None | ||
|
||
# Try to grab the data from the local filesystem | ||
try: | ||
with open(local_org_ids_file) as fp: | ||
org_id_file_contents = json.load(fp) | ||
except FileNotFoundError: | ||
pass | ||
|
||
if org_id_file_contents is None or not org_id_file_fresh( | ||
org_id_file_contents, today | ||
): | ||
# Refresh the file | ||
try: | ||
org_id_file_contents = requests.get(orgids_url).json() | ||
except requests.exceptions.RequestException as e: | ||
# We have tried locally and remotely with no luck. We have to raise. | ||
raise e | ||
|
||
org_id_file_contents["downloaded"] = "%s" % today | ||
# Use a tempfile and move to create new file here for atomicity | ||
with NamedTemporaryFile(mode="w", delete=False) as tmp: | ||
json.dump(org_id_file_contents, tmp, indent=2) | ||
shutil.move(tmp.name, local_org_ids_file) | ||
# Return either the original file data, if it was found to be fresh, | ||
# or the new data, if we were able to retrieve it. | ||
return [org_list["code"] for org_list in org_id_file_contents["lists"]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[tool.isort] | ||
profile = "black" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[flake8] | ||
max-line-length = 88 | ||
extend-ignore = E203 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters