Skip to content

Commit

Permalink
Add code
Browse files Browse the repository at this point in the history
  • Loading branch information
Misc authored and odscjames committed May 9, 2023
1 parent 6744fcd commit fad7f5b
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 5 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Lint
on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup python
uses: actions/setup-python@v2
with:
python-version: 3.9
architecture: x64
- uses: actions/cache@v1
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }}-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-
- run: pip install -e .[dev]
- run: isort --check-only libcove2 setup.py
- run: black --check libcove2 setup.py
- run: flake8 libcove2 setup.py
- run: mypy --install-types --non-interactive -p libcove2
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__pycache__
/dist
18 changes: 18 additions & 0 deletions LICENSE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Cove - COnvert Validate & Explore
=================================

Cove - COnvert Validate & Explore is free software designed to help people check data
published to various different data standards.

Cove - COnvert Validate & Explore is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Cove - COnvert Validate & Explore is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with Cove - COnvert Validate & Explore. If not, see <http://www.gnu.org/licenses/>.
145 changes: 145 additions & 0 deletions libcove2/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import datetime
import json
import os
import re
import shutil
from tempfile import NamedTemporaryFile

import requests

LANGUAGE_RE = re.compile(
"^(.*_(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)))$" # noqa
)


def schema_dict_fields_generator(schema_dict):
if "properties" in schema_dict and isinstance(schema_dict["properties"], dict):
for property_name, value in schema_dict["properties"].items():
if "oneOf" in value:
property_schema_dicts = value["oneOf"]
else:
property_schema_dicts = [value]
for property_schema_dict in property_schema_dicts:
if not isinstance(property_schema_dict, dict):
continue
if "properties" in property_schema_dict:
for field in schema_dict_fields_generator(property_schema_dict):
yield f"/{property_name}{field}"
elif "items" in property_schema_dict:
for field in schema_dict_fields_generator(
property_schema_dict["items"]
):
yield f"/{property_name}{field}"
yield f"/{property_name}"
if "items" in schema_dict and isinstance(schema_dict["items"], dict):
if "oneOf" in schema_dict["items"] and isinstance(
schema_dict["items"]["oneOf"], list
):
for oneOf in schema_dict["items"]["oneOf"]:
for field in schema_dict_fields_generator(oneOf):
yield field


def get_additional_fields_info(json_data, schema_fields, fields_regex=False):
fields_present = get_fields_present_with_examples(json_data)

additional_fields = {}
root_additional_fields = set()

for field, field_info in fields_present.items():
if field in schema_fields:
continue
if fields_regex and LANGUAGE_RE.search(field.split("/")[-1]):
continue

for root_additional_field in root_additional_fields:
if field.startswith(root_additional_field):
field_info["root_additional_field"] = False
additional_fields[root_additional_field][
"additional_field_descendance"
][field] = field_info
break
else:
field_info["root_additional_field"] = True
field_info["additional_field_descendance"] = {}
root_additional_fields.add(field)

field_info["path"] = "/".join(field.split("/")[:-1])
field_info["field_name"] = field.split("/")[-1]
additional_fields[field] = field_info

return additional_fields


def get_fields_present_with_examples(*args, **kwargs):
counter = {}
for key, value in fields_present_generator(*args, **kwargs):
if key not in counter:
counter[key] = {"count": 1, "examples": []}
else:
counter[key]["count"] += 1
if len(counter[key]["examples"]) < 3:
if not isinstance(value, (list, dict)):
counter[key]["examples"].append(value)

return counter


def fields_present_generator(json_data, prefix=""):
if isinstance(json_data, dict):
for key, value in json_data.items():
new_key = f"{prefix}/{key}"
yield new_key, value
if isinstance(value, (dict, list)):
yield from fields_present_generator(value, new_key)
elif isinstance(json_data, list):
for item in json_data:
if isinstance(item, dict):
yield from fields_present_generator(item, prefix)


def org_id_file_fresh(org_id_file_contents, check_date):
"""Unless the file was downloaded on greater than or equal to 'check_date'
it is considered stale."""
org_id_file_date_downloaded_date = datetime.datetime.strptime(
org_id_file_contents.get("downloaded", "2000-1-1"), "%Y-%m-%d"
).date()
return org_id_file_date_downloaded_date >= check_date


def get_orgids_prefixes(orgids_url=None):
"""Get org-ids.json file from file system
(or fetch remotely if it doesn't exist)"""
local_org_ids_file = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "org-ids.json"
)
today = datetime.date.today()
if orgids_url is None:
orgids_url = "http://org-id.guide/download.json"
org_id_file_contents = None

# Try to grab the data from the local filesystem
try:
with open(local_org_ids_file) as fp:
org_id_file_contents = json.load(fp)
except FileNotFoundError:
pass

if org_id_file_contents is None or not org_id_file_fresh(
org_id_file_contents, today
):
# Refresh the file
try:
org_id_file_contents = requests.get(orgids_url).json()
except requests.exceptions.RequestException as e:
# We have tried locally and remotely with no luck. We have to raise.
raise e

org_id_file_contents["downloaded"] = "%s" % today
# Use a tempfile and move to create new file here for atomicity
with NamedTemporaryFile(mode="w", delete=False) as tmp:
json.dump(org_id_file_contents, tmp, indent=2)
shutil.move(tmp.name, local_org_ids_file)
# Return either the original file data, if it was found to be fresh,
# or the new data, if we were able to retrieve it.
return [org_list["code"] for org_list in org_id_file_contents["lists"]]
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tool.isort]
profile = "black"
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 88
extend-ignore = E203
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
packages=find_packages(),
long_description="A data review library",
python_requires=">=3.8",
install_requires=[
],
classifiers=[
],
install_requires=["requests"],
classifiers=[],
extras_require={
"dev": ["black", "isort", "flake8", "mypy"],
},
)

0 comments on commit fad7f5b

Please sign in to comment.