diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 00000000..c502bd8f --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,7 @@ +# From https://black.readthedocs.io/en/stable/the_black_code_style.html?highlight=isort#how-black-wraps-lines +[settings] +multi_line_output=3 +include_trailing_comma=True +force_grid_wrap=0 +use_parentheses=True +line_length=88 diff --git a/.travis.yml b/.travis.yml index ac05f79d..649f81ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,5 +8,11 @@ python: - "3.8" install: - pip install --upgrade -r requirements_dev.txt -script: py.test --cov . + # Black only runs under Python >= 3.6 + - if [[ $TRAVIS_PYTHON_VERSION != 3.5 ]]; then pip install black==19.10b0; fi +script: + - if [[ $TRAVIS_PYTHON_VERSION != 3.5 ]]; then black --check *.py */; fi + - isort --check-only --recursive *.py */ + - flake8 + - py.test --cov . after_success: coveralls diff --git a/CHANGELOG.md b/CHANGELOG.md index a09b7647..30e17c7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] + +### Changed + +- All code has had black and isort applied. These have been added to Travis. + ## [0.11.0] - 2020-02-21 ### Added diff --git a/docs/conf.py b/docs/conf.py index 59d9beea..c086d4ea 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -33,42 +33,42 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode', + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'Flatten Tool' -copyright = '2016-2020, Open Data Services' -author = 'Open Data Services' +project = "Flatten Tool" +copyright = "2016-2020, Open Data Services" +author = "Open Data Services" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.0.0' +version = "0.0.0" # The full version, including alpha/beta/rc tags. -release = '0.0.0' +release = "0.0.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -89,7 +89,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -111,7 +111,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -165,7 +165,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -245,34 +245,36 @@ # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'FlattenTooldoc' +htmlhelp_basename = "FlattenTooldoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'FlattenTool.tex', 'Flatten Tool Documentation', - 'Open Data Services', 'manual'), + ( + master_doc, + "FlattenTool.tex", + "Flatten Tool Documentation", + "Open Data Services", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of @@ -306,10 +308,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'flattentool', 'Flatten Tool Documentation', - [author], 1) -] +man_pages = [(master_doc, "flattentool", "Flatten Tool Documentation", [author], 1)] # If true, show URL addresses after external links. # @@ -322,9 +321,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'FlattenTool', 'Flatten Tool Documentation', - author, 'FlattenTool', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "FlattenTool", + "Flatten Tool Documentation", + author, + "FlattenTool", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. diff --git a/flattentool/ODSReader.py b/flattentool/ODSReader.py index fdb651bd..abce2d22 100644 --- a/flattentool/ODSReader.py +++ b/flattentool/ODSReader.py @@ -15,18 +15,18 @@ # Thanks to grt for the fixes # https://github.com/marcoconti83/read-ods-with-odfpy -import odf.opendocument -from odf.table import Table, TableRow, TableCell -from odf.text import P -from collections import OrderedDict import re +from collections import OrderedDict + +import odf.opendocument +from odf.table import Table, TableCell, TableRow # http://stackoverflow.com/a/4544699/1846474 class GrowingList(list): def __setitem__(self, index, value): if index >= len(self): - self.extend([None]*(index + 1 - len(self))) + self.extend([None] * (index + 1 - len(self))) list.__setitem__(self, index, value) @@ -49,7 +49,7 @@ def readSheet(self, sheet): # for each row for row in rows: - row_comment = "" + row_comment = "" # noqa arrCells = GrowingList() cells = row.getElementsByType(TableCell) @@ -58,26 +58,38 @@ def readSheet(self, sheet): for cell in cells: # repeated value? repeat = cell.getAttribute("numbercolumnsrepeated") - if(not repeat): + if not repeat: repeat = 1 - spanned = int(cell.getAttribute('numbercolumnsspanned') or 0) + spanned = int(cell.getAttribute("numbercolumnsspanned") or 0) # clone spanned cells if self.clonespannedcolumns is not None and spanned > 1: repeat = spanned for rr in range(int(repeat)): # repeated? if str(cell): - value_type = cell.attributes.get(('urn:oasis:names:tc:opendocument:xmlns:office:1.0', 'value-type')) - if value_type == 'float': - if '.' in str(cell): + value_type = cell.attributes.get( + ( + "urn:oasis:names:tc:opendocument:xmlns:office:1.0", + "value-type", + ) + ) + if value_type == "float": + if "." in str(cell): arrCells[count] = float(str(cell)) else: arrCells[count] = int(str(cell)) - elif value_type == 'date': - date_value = cell.attributes.get(('urn:oasis:names:tc:opendocument:xmlns:office:1.0', 'date-value')) + elif value_type == "date": + date_value = cell.attributes.get( + ( + "urn:oasis:names:tc:opendocument:xmlns:office:1.0", + "date-value", + ) + ) # Add UTC timezone to naive datetime strings - if re.match(r'^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d$', date_value): - date_value += 'Z' + if re.match( + r"^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d$", date_value + ): + date_value += "Z" arrCells[count] = date_value else: arrCells[count] = str(cell) diff --git a/flattentool/__init__.py b/flattentool/__init__.py index 6675c68e..523bfe3f 100644 --- a/flattentool/__init__.py +++ b/flattentool/__init__.py @@ -1,20 +1,31 @@ -from flattentool.schema import SchemaParser +import codecs +import json +import sys +from collections import OrderedDict +from decimal import Decimal + +from flattentool.input import FORMATS as INPUT_FORMATS from flattentool.json_input import JSONParser +from flattentool.lib import parse_sheet_configuration from flattentool.output import FORMATS as OUTPUT_FORMATS from flattentool.output import FORMATS_SUFFIX -from flattentool.input import FORMATS as INPUT_FORMATS +from flattentool.schema import SchemaParser from flattentool.xml_output import toxml -from flattentool.lib import parse_sheet_configuration -import sys -import json -import codecs -from decimal import Decimal -from collections import OrderedDict -def create_template(schema, output_name=None, output_format='all', main_sheet_name='main', - rollup=False, root_id=None, use_titles=False, disable_local_refs=False, truncation_length=3, - no_deprecated_fields=False, **_): +def create_template( + schema, + output_name=None, + output_format="all", + main_sheet_name="main", + rollup=False, + root_id=None, + use_titles=False, + disable_local_refs=False, + truncation_length=3, + no_deprecated_fields=False, + **_ +): """ Creates template file(s) from given inputs This function is built to deal with commandline input and arguments @@ -22,44 +33,71 @@ def create_template(schema, output_name=None, output_format='all', main_sheet_na """ - parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, - disable_local_refs=disable_local_refs, truncation_length=truncation_length, - exclude_deprecated_fields=no_deprecated_fields) + parser = SchemaParser( + schema_filename=schema, + rollup=rollup, + root_id=root_id, + use_titles=use_titles, + disable_local_refs=disable_local_refs, + truncation_length=truncation_length, + exclude_deprecated_fields=no_deprecated_fields, + ) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( - parser=parser, - main_sheet_name=main_sheet_name, - output_name=name) + parser=parser, main_sheet_name=main_sheet_name, output_name=name + ) spreadsheet_output.write_sheets() - if output_format == 'all': + if output_format == "all": if not output_name: - output_name = 'template' + output_name = "template" for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): - spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name]) + spreadsheet_output( + spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name] + ) - elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats + elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats if not output_name: - output_name = 'template' + FORMATS_SUFFIX[output_format] + output_name = "template" + FORMATS_SUFFIX[output_format] spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: - raise Exception('The requested format is not available') - - -def flatten(input_name, schema=None, output_name=None, output_format='all', main_sheet_name='main', - root_list_path='main', root_is_list=False, sheet_prefix='', filter_field=None, filter_value=None, - preserve_fields=None, rollup=False, root_id=None, use_titles=False, xml=False, id_name='id', - disable_local_refs=False, remove_empty_schema_columns=False, truncation_length=3, **_): + raise Exception("The requested format is not available") + + +def flatten( + input_name, + schema=None, + output_name=None, + output_format="all", + main_sheet_name="main", + root_list_path="main", + root_is_list=False, + sheet_prefix="", + filter_field=None, + filter_value=None, + preserve_fields=None, + rollup=False, + root_id=None, + use_titles=False, + xml=False, + id_name="id", + disable_local_refs=False, + remove_empty_schema_columns=False, + truncation_length=3, + **_ +): """ Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx). """ - if (filter_field is None and filter_value is not None) or (filter_field is not None and filter_value is None): - raise Exception('You must use filter_field and filter_value together') + if (filter_field is None and filter_value is not None) or ( + filter_field is not None and filter_value is None + ): + raise Exception("You must use filter_field and filter_value together") if schema: schema_parser = SchemaParser( @@ -68,7 +106,8 @@ def flatten(input_name, schema=None, output_name=None, output_format='all', main root_id=root_id, use_titles=use_titles, disable_local_refs=disable_local_refs, - truncation_length=truncation_length) + truncation_length=truncation_length, + ) schema_parser.parse() else: schema_parser = None @@ -86,7 +125,8 @@ def flatten(input_name, schema=None, output_name=None, output_format='all', main filter_value=filter_value, preserve_fields=preserve_fields, remove_empty_schema_columns=remove_empty_schema_columns, - truncation_length=truncation_length) + truncation_length=truncation_length, + ) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): @@ -94,22 +134,25 @@ def spreadsheet_output(spreadsheet_output_class, name): parser=parser, main_sheet_name=main_sheet_name, output_name=name, - sheet_prefix=sheet_prefix) + sheet_prefix=sheet_prefix, + ) spreadsheet_output.write_sheets() - if output_format == 'all': + if output_format == "all": if not output_name: - output_name = 'flattened' + output_name = "flattened" for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): - spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name]) + spreadsheet_output( + spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name] + ) - elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats + elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats if not output_name: - output_name = 'flattened' + FORMATS_SUFFIX[output_format] + output_name = "flattened" + FORMATS_SUFFIX[output_format] spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: - raise Exception('The requested format is not available') + raise Exception("The requested format is not available") # From http://bugs.python.org/issue16535 @@ -136,29 +179,44 @@ def decimal_default(o): raise TypeError(repr(o) + " is not JSON serializable") -def unflatten(input_name, base_json=None, input_format=None, output_name=None, - root_list_path=None, root_is_list=False, encoding='utf8', timezone_name='UTC', - root_id=None, schema='', convert_titles=False, cell_source_map=None, - heading_source_map=None, id_name=None, xml=False, - vertical_orientation=False, - metatab_name=None, metatab_only=False, metatab_schema='', - metatab_vertical_orientation=False, - xml_schemas=None, - default_configuration='', - disable_local_refs=False, - xml_comment=None, - truncation_length=3, - **_): +def unflatten( + input_name, + base_json=None, + input_format=None, + output_name=None, + root_list_path=None, + root_is_list=False, + encoding="utf8", + timezone_name="UTC", + root_id=None, + schema="", + convert_titles=False, + cell_source_map=None, + heading_source_map=None, + id_name=None, + xml=False, + vertical_orientation=False, + metatab_name=None, + metatab_only=False, + metatab_schema="", + metatab_vertical_orientation=False, + xml_schemas=None, + default_configuration="", + disable_local_refs=False, + xml_comment=None, + truncation_length=3, + **_ +): """ Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON). """ if input_format is None: - raise Exception('You must specify an input format (may autodetect in future') + raise Exception("You must specify an input format (may autodetect in future") elif input_format not in INPUT_FORMATS: - raise Exception('The requested format is not available') + raise Exception("The requested format is not available") if metatab_name and base_json: - raise Exception('Not allowed to use base_json with metatab') + raise Exception("Not allowed to use base_json with metatab") if root_is_list: base = None @@ -168,7 +226,6 @@ def unflatten(input_name, base_json=None, input_format=None, output_name=None, else: base = OrderedDict() - base_configuration = parse_sheet_configuration( [item.strip() for item in default_configuration.split(",")] ) @@ -181,21 +238,27 @@ def unflatten(input_name, base_json=None, input_format=None, output_name=None, spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, - root_list_path='meta', + root_list_path="meta", include_sheets=[metatab_name], convert_titles=convert_titles, vertical_orientation=metatab_vertical_orientation, id_name=id_name, xml=xml, - use_configuration=False + use_configuration=False, ) if metatab_schema: - parser = SchemaParser(schema_filename=metatab_schema, disable_local_refs=disable_local_refs) + parser = SchemaParser( + schema_filename=metatab_schema, disable_local_refs=disable_local_refs + ) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() - result, cell_source_map_data_meta, heading_source_map_data_meta = spreadsheet_input.fancy_unflatten( + ( + result, + cell_source_map_data_meta, + heading_source_map_data_meta, + ) = spreadsheet_input.fancy_unflatten( with_cell_source_map=cell_source_map, with_heading_source_map=heading_source_map, ) @@ -207,15 +270,17 @@ def unflatten(input_name, base_json=None, input_format=None, output_name=None, heading_source_map_data[key[5:]] = value # update individual keys from base configuration - base_configuration.update(spreadsheet_input.sheet_configuration.get(metatab_name, {})) + base_configuration.update( + spreadsheet_input.sheet_configuration.get(metatab_name, {}) + ) if result: base.update(result[0]) if root_list_path is None: - root_list_path = base_configuration.get('RootListPath', 'main') + root_list_path = base_configuration.get("RootListPath", "main") if id_name is None: - id_name = base_configuration.get('IDName', 'id') + id_name = base_configuration.get("IDName", "id") if not metatab_only or root_is_list: spreadsheet_input_class = INPUT_FORMATS[input_format] @@ -230,16 +295,25 @@ def unflatten(input_name, base_json=None, input_format=None, output_name=None, vertical_orientation=vertical_orientation, id_name=id_name, xml=xml, - base_configuration=base_configuration + base_configuration=base_configuration, ) if schema: - parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id, - disable_local_refs=disable_local_refs, truncation_length=truncation_length) + parser = SchemaParser( + schema_filename=schema, + rollup=True, + root_id=root_id, + disable_local_refs=disable_local_refs, + truncation_length=truncation_length, + ) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() - result, cell_source_map_data_main, heading_source_map_data_main = spreadsheet_input.fancy_unflatten( + ( + result, + cell_source_map_data_main, + heading_source_map_data_main, + ) = spreadsheet_input.fancy_unflatten( with_cell_source_map=cell_source_map, with_heading_source_map=heading_source_map, ) @@ -251,23 +325,44 @@ def unflatten(input_name, base_json=None, input_format=None, output_name=None, base[root_list_path] = list(result) if xml: - xml_root_tag = base_configuration.get('XMLRootTag', 'iati-activities') + xml_root_tag = base_configuration.get("XMLRootTag", "iati-activities") xml_output = toxml( - base, xml_root_tag, xml_schemas=xml_schemas, root_list_path=root_list_path, xml_comment=xml_comment) + base, + xml_root_tag, + xml_schemas=xml_schemas, + root_list_path=root_list_path, + xml_comment=xml_comment, + ) if output_name is None: sys.stdout.buffer.write(xml_output) else: - with codecs.open(output_name, 'wb') as fp: + with codecs.open(output_name, "wb") as fp: fp.write(xml_output) else: if output_name is None: - print(json.dumps(base, indent=4, default=decimal_default, ensure_ascii=False)) + print( + json.dumps(base, indent=4, default=decimal_default, ensure_ascii=False) + ) else: - with codecs.open(output_name, 'w', encoding='utf-8') as fp: - json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False) + with codecs.open(output_name, "w", encoding="utf-8") as fp: + json.dump( + base, fp, indent=4, default=decimal_default, ensure_ascii=False + ) if cell_source_map: - with codecs.open(cell_source_map, 'w', encoding='utf-8') as fp: - json.dump(cell_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False) + with codecs.open(cell_source_map, "w", encoding="utf-8") as fp: + json.dump( + cell_source_map_data, + fp, + indent=4, + default=decimal_default, + ensure_ascii=False, + ) if heading_source_map: - with codecs.open(heading_source_map, 'w', encoding='utf-8') as fp: - json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False) + with codecs.open(heading_source_map, "w", encoding="utf-8") as fp: + json.dump( + heading_source_map_data, + fp, + indent=4, + default=decimal_default, + ensure_ascii=False, + ) diff --git a/flattentool/cli.py b/flattentool/cli.py index 9a88ea52..b3ba0124 100644 --- a/flattentool/cli.py +++ b/flattentool/cli.py @@ -1,13 +1,14 @@ from __future__ import print_function import argparse -import warnings import sys +import warnings -from flattentool import create_template, unflatten, flatten +from flattentool import create_template, flatten, unflatten from flattentool.input import FORMATS as INPUT_FORMATS -from flattentool.output import FORMATS as OUTPUT_FORMATS from flattentool.json_input import BadlyFormedJSONError +from flattentool.output import FORMATS as OUTPUT_FORMATS + """ This file does most of the work of the flatten-tool commandline command. @@ -32,217 +33,261 @@ def create_parser(): """ parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest='subparser_name') + subparsers = parser.add_subparsers(dest="subparser_name") - output_formats = sorted(OUTPUT_FORMATS) + ['all'] + output_formats = sorted(OUTPUT_FORMATS) + ["all"] input_formats = sorted(INPUT_FORMATS) parser.add_argument( - '-v', '--verbose', - action='store_true', - help='Print detailed output when warnings or errors occur.') + "-v", + "--verbose", + action="store_true", + help="Print detailed output when warnings or errors occur.", + ) parser_create_template = subparsers.add_parser( - 'create-template', - help='Create a template from the given schema') + "create-template", help="Create a template from the given schema" + ) parser_create_template.add_argument( - "-s", "--schema", + "-s", + "--schema", help="Path to the schema file you want to use to create the template", - required=True) + required=True, + ) parser_create_template.add_argument( - "-f", "--output-format", + "-f", + "--output-format", help="Type of template you want to create. Defaults to all available options", - choices=output_formats) + choices=output_formats, + ) parser_create_template.add_argument( - "-m", "--main-sheet-name", - help="The name of the main sheet, as seen in the first tab of the spreadsheet for example. Defaults to main") + "-m", + "--main-sheet-name", + help="The name of the main sheet, as seen in the first tab of the spreadsheet for example. Defaults to main", + ) parser_create_template.add_argument( - "-o", "--output-name", - help="Name of the outputted file. Will have an extension appended if format is all.") + "-o", + "--output-name", + help="Name of the outputted file. Will have an extension appended if format is all.", + ) parser_create_template.add_argument( "--rollup", - action='store_true', - help="\"Roll up\" columns from subsheets into the main sheet if they are specified in a rollUp attribute in the schema.") + action="store_true", + help='"Roll up" columns from subsheets into the main sheet if they are specified in a rollUp attribute in the schema.', + ) parser_create_template.add_argument( - "-r", "--root-id", - help="Root ID of the data format, e.g. ocid for OCDS") + "-r", "--root-id", help="Root ID of the data format, e.g. ocid for OCDS" + ) parser_create_template.add_argument( - "--use-titles", - action='store_true', - help="Convert titles.") + "--use-titles", action="store_true", help="Convert titles." + ) parser_create_template.add_argument( "--disable-local-refs", - action='store_true', - help="Disable local refs when parsing JSON Schema.") + action="store_true", + help="Disable local refs when parsing JSON Schema.", + ) parser_create_template.add_argument( "--no-deprecated-fields", - action='store_true', - help="Exclude Fields marked as deprecated in the JSON Schema.") + action="store_true", + help="Exclude Fields marked as deprecated in the JSON Schema.", + ) parser_create_template.add_argument( "--truncation-length", - type=int, default=3, - help="The length of components of sub-sheet names (default 3).") - - parser_flatten = subparsers.add_parser( - 'flatten', - help='Flatten a JSON file') + type=int, + default=3, + help="The length of components of sub-sheet names (default 3).", + ) + + parser_flatten = subparsers.add_parser("flatten", help="Flatten a JSON file") + parser_flatten.add_argument("input_name", help="Name of the input JSON file.") + parser_flatten.add_argument("-s", "--schema", help="Path to a relevant schema.") parser_flatten.add_argument( - 'input_name', - help="Name of the input JSON file.") - parser_flatten.add_argument( - "-s", "--schema", - help="Path to a relevant schema.") - parser_flatten.add_argument( - "-f", "--output-format", + "-f", + "--output-format", help="Type of template you want to create. Defaults to all available options", - choices=output_formats) + choices=output_formats, + ) parser_flatten.add_argument( - "--xml", - action='store_true', - help="Use XML as the input format") + "--xml", action="store_true", help="Use XML as the input format" + ) parser_flatten.add_argument( - "--id-name", - help="String to use for the identifier key, defaults to 'id'") + "--id-name", help="String to use for the identifier key, defaults to 'id'" + ) parser_flatten.add_argument( - "-m", "--main-sheet-name", - help="The name of the main sheet, as seen in the first tab of the spreadsheet for example. Defaults to main") + "-m", + "--main-sheet-name", + help="The name of the main sheet, as seen in the first tab of the spreadsheet for example. Defaults to main", + ) parser_flatten.add_argument( - "-o", "--output-name", - help="Name of the outputted file. Will have an extension appended if format is all.") + "-o", + "--output-name", + help="Name of the outputted file. Will have an extension appended if format is all.", + ) parser_flatten.add_argument( - "--root-list-path", - help="Path of the root list, defaults to main") + "--root-list-path", help="Path of the root list, defaults to main" + ) parser_flatten.add_argument( "--rollup", - nargs='?', + nargs="?", const=True, - action='append', - help="\"Roll up\" columns from subsheets into the main sheet. Pass one or more JSON paths directly, or a file with one JSON path per line, or no value and use a schema containing (a) rollUp attribute(s). Schema takes precedence if both direct input and schema with rollUps are present.") + action="append", + help='"Roll up" columns from subsheets into the main sheet. Pass one or more JSON paths directly, or a file with one JSON path per line, or no value and use a schema containing (a) rollUp attribute(s). Schema takes precedence if both direct input and schema with rollUps are present.', + ) parser_flatten.add_argument( - "-r", "--root-id", - help="Root ID of the data format, e.g. ocid for OCDS") + "-r", "--root-id", help="Root ID of the data format, e.g. ocid for OCDS" + ) parser_flatten.add_argument( "--use-titles", - action='store_true', - help="Convert titles. Requires a schema to be specified.") + action="store_true", + help="Convert titles. Requires a schema to be specified.", + ) parser_flatten.add_argument( "--truncation-length", - type=int, default=3, - help="The length of components of sub-sheet names (default 3).") + type=int, + default=3, + help="The length of components of sub-sheet names (default 3).", + ) parser_flatten.add_argument( "--root-is-list", - action='store_true', - help="The root element is a list. --root-list-path and meta data will be ignored.") + action="store_true", + help="The root element is a list. --root-list-path and meta data will be ignored.", + ) parser_flatten.add_argument( "--sheet-prefix", - help="A string to prefix to the start of every sheet (or file) name.") + help="A string to prefix to the start of every sheet (or file) name.", + ) parser_flatten.add_argument( "--filter-field", - help="Data Filter - only data with this will be processed. Use with --filter-value") + help="Data Filter - only data with this will be processed. Use with --filter-value", + ) parser_flatten.add_argument( "--filter-value", - help="Data Filter - only data with this will be processed. Use with --filter-field") + help="Data Filter - only data with this will be processed. Use with --filter-field", + ) parser_flatten.add_argument( "--preserve-fields", - help="Only these fields will be processed. Pass a file with JSON paths to be preserved one per line.") + help="Only these fields will be processed. Pass a file with JSON paths to be preserved one per line.", + ) parser_flatten.add_argument( "--disable-local-refs", - action='store_true', - help="Disable local refs when parsing JSON Schema.") + action="store_true", + help="Disable local refs when parsing JSON Schema.", + ) parser_flatten.add_argument( "--remove-empty-schema-columns", - action='store_true', - help="When using flatten with a schema, remove columns and sheets from the output that contain no data.") + action="store_true", + help="When using flatten with a schema, remove columns and sheets from the output that contain no data.", + ) parser_unflatten = subparsers.add_parser( - 'unflatten', - help='Unflatten a spreadsheet') + "unflatten", help="Unflatten a spreadsheet" + ) parser_unflatten.add_argument( - 'input_name', - help="Name of the input file or directory.") + "input_name", help="Name of the input file or directory." + ) parser_unflatten.add_argument( - "-f", "--input-format", + "-f", + "--input-format", help="File format of input file or directory.", choices=input_formats, - required=True) + required=True, + ) parser_unflatten.add_argument( - "--xml", - action='store_true', - help="Use XML as the output format") + "--xml", action="store_true", help="Use XML as the output format" + ) parser_unflatten.add_argument( - "--id-name", - help="String to use for the identifier key, defaults to 'id'") + "--id-name", help="String to use for the identifier key, defaults to 'id'" + ) parser_unflatten.add_argument( - "-b", "--base-json", - help="A base json file to populate with the unflattened data.") + "-b", + "--base-json", + help="A base json file to populate with the unflattened data.", + ) parser_unflatten.add_argument( - "-m", "--root-list-path", - help="The path in the JSON that will contain the unflattened list. Defaults to main.") + "-m", + "--root-list-path", + help="The path in the JSON that will contain the unflattened list. Defaults to main.", + ) parser_unflatten.add_argument( - "-e", "--encoding", - help="Encoding of the input file(s) (only relevant for CSV). This can be any encoding recognised by Python. Defaults to utf8.") + "-e", + "--encoding", + help="Encoding of the input file(s) (only relevant for CSV). This can be any encoding recognised by Python. Defaults to utf8.", + ) parser_unflatten.add_argument( - "-o", "--output-name", - help="Name of the outputted file. Will have an extension appended as appropriate.") + "-o", + "--output-name", + help="Name of the outputted file. Will have an extension appended as appropriate.", + ) parser_unflatten.add_argument( - "-c", "--cell-source-map", - help="Path to write a cell source map to. Will have an extension appended as appropriate.") + "-c", + "--cell-source-map", + help="Path to write a cell source map to. Will have an extension appended as appropriate.", + ) parser_unflatten.add_argument( - "-a", "--heading-source-map", - help="Path to write a heading source map to. Will have an extension appended as appropriate.") + "-a", + "--heading-source-map", + help="Path to write a heading source map to. Will have an extension appended as appropriate.", + ) parser_unflatten.add_argument( "--timezone-name", - help="Name of the timezone, defaults to UTC. Should be in tzdata format, e.g. Europe/London") - parser_unflatten.add_argument( - "-r", "--root-id", - help="Root ID of the data format, e.g. ocid for OCDS") + help="Name of the timezone, defaults to UTC. Should be in tzdata format, e.g. Europe/London", + ) parser_unflatten.add_argument( - "-s", "--schema", - help="Path to a relevant schema.") + "-r", "--root-id", help="Root ID of the data format, e.g. ocid for OCDS" + ) + parser_unflatten.add_argument("-s", "--schema", help="Path to a relevant schema.") parser_unflatten.add_argument( "--convert-titles", - action='store_true', - help="Convert titles. Requires a schema to be specified.") + action="store_true", + help="Convert titles. Requires a schema to be specified.", + ) parser_unflatten.add_argument( "--vertical-orientation", - action='store_true', - help="Read spreadsheet so that headings are in the first column and data is read vertically. Only for XLSX not CSV") + action="store_true", + help="Read spreadsheet so that headings are in the first column and data is read vertically. Only for XLSX not CSV", + ) parser_unflatten.add_argument( "--metatab-name", - help="If supplied will assume there is a metadata tab with the given name") + help="If supplied will assume there is a metadata tab with the given name", + ) parser_unflatten.add_argument( - "--metatab-schema", - help="The jsonschema of the metadata tab") + "--metatab-schema", help="The jsonschema of the metadata tab" + ) parser_unflatten.add_argument( - "--metatab-only", - action='store_true', - help="Parse the metatab and nothing else") + "--metatab-only", action="store_true", help="Parse the metatab and nothing else" + ) parser_unflatten.add_argument( "--metatab-vertical-orientation", - action='store_true', - help="Read metatab so that headings are in the first column and data is read vertically. Only for XLSX not CSV") + action="store_true", + help="Read metatab so that headings are in the first column and data is read vertically. Only for XLSX not CSV", + ) parser_unflatten.add_argument( "--xml-schema", - dest='xml_schemas', - metavar='XML_SCHEMA', - nargs='*', - help="Path to one or more XML schemas (used for sorting)") + dest="xml_schemas", + metavar="XML_SCHEMA", + nargs="*", + help="Path to one or more XML schemas (used for sorting)", + ) parser_unflatten.add_argument( "--default-configuration", - help="Comma seperated list of default parsing commands for all sheets. Only for XLSX not CSV") + help="Comma seperated list of default parsing commands for all sheets. Only for XLSX not CSV", + ) parser_unflatten.add_argument( "--root-is-list", - action='store_true', - help="The root element is a list. --root-list-path and meta data will be ignored.") + action="store_true", + help="The root element is a list. --root-list-path and meta data will be ignored.", + ) parser_unflatten.add_argument( "--disable-local-refs", - action='store_true', - help="Disable local refs when parsing JSON Schema.") + action="store_true", + help="Disable local refs when parsing JSON Schema.", + ) parser_unflatten.add_argument( "--xml-comment", required=False, default="XML generated by flatten-tool", - help="String comment of what generates the xml file") + help="String comment of what generates the xml file", + ) return parser @@ -258,16 +303,17 @@ def kwargs_from_parsed_args(args): def non_verbose_error_handler(type, value, traceback): if type == BadlyFormedJSONError: - sys.stderr.write('JSON error: {}\n'.format(value)) + sys.stderr.write("JSON error: {}\n".format(value)) else: - sys.stderr.write(str(value) + '\n') + sys.stderr.write(str(value) + "\n") default_warning_formatter = warnings.formatwarning + def non_verbose_warning_formatter(message, category, filename, lineno, line=None): if issubclass(category, UserWarning): - return str(message) + '\n' + return str(message) + "\n" else: return default_warning_formatter(message, category, filename, lineno, line) @@ -291,7 +337,7 @@ def main(): sys.excepthook = non_verbose_error_handler warnings.formatwarning = non_verbose_warning_formatter - if args.subparser_name == 'create-template': + if args.subparser_name == "create-template": # Pass the arguments to the create_template function # If the schema file does not exist we catch it in this exception try: @@ -300,11 +346,11 @@ def main(): except (OSError, IOError) as e: print(str(e)) return - elif args.subparser_name == 'flatten': + elif args.subparser_name == "flatten": flatten(**kwargs_from_parsed_args(args)) - elif args.subparser_name == 'unflatten': + elif args.subparser_name == "unflatten": unflatten(**kwargs_from_parsed_args(args)) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/flattentool/exceptions.py b/flattentool/exceptions.py index 801a7ae7..8b979747 100644 --- a/flattentool/exceptions.py +++ b/flattentool/exceptions.py @@ -3,5 +3,5 @@ class DataErrorWarning(UserWarning): A warnings that indicates an error in the data, rather than the schema. """ - pass + pass diff --git a/flattentool/input.py b/flattentool/input.py index b2d3ccb3..9d33d8c0 100644 --- a/flattentool/input.py +++ b/flattentool/input.py @@ -3,29 +3,23 @@ """ -from __future__ import print_function -from __future__ import unicode_literals -import sys -from decimal import Decimal, InvalidOperation +from __future__ import print_function, unicode_literals + +import datetime import os -from collections import OrderedDict -import openpyxl +from collections import OrderedDict, UserDict +from csv import DictReader +from csv import reader as csvreader +from decimal import Decimal, InvalidOperation from warnings import warn -import traceback -import datetime + +import openpyxl import pytz -from openpyxl.utils import column_index_from_string from openpyxl.utils.cell import _get_column_letter -from odf.opendocument import load as load_odf -import odf.table -from flattentool.ODSReader import ODSReader - from flattentool.exceptions import DataErrorWarning from flattentool.lib import isint, parse_sheet_configuration -from csv import DictReader -from csv import reader as csvreader -from collections import UserDict +from flattentool.ODSReader import ODSReader try: from zipfile import BadZipFile @@ -33,7 +27,6 @@ from zipfile import BadZipfile as BadZipFile - class Cell: def __init__(self, cell_value, cell_location): self.cell_value = cell_value @@ -41,58 +34,75 @@ def __init__(self, cell_value, cell_location): self.sub_cells = [] - -def convert_type(type_string, value, timezone = pytz.timezone('UTC')): - if value == '' or value is None: +def convert_type(type_string, value, timezone=pytz.timezone("UTC")): + if value == "" or value is None: return None - if type_string == 'number': + if type_string == "number": try: return Decimal(value) except (TypeError, ValueError, InvalidOperation): - warn('Non-numeric value "{}" found in number column, returning as string instead.'.format(value), - DataErrorWarning) + warn( + 'Non-numeric value "{}" found in number column, returning as string instead.'.format( + value + ), + DataErrorWarning, + ) return str(value) - elif type_string == 'integer': + elif type_string == "integer": try: return int(value) except (TypeError, ValueError): - warn('Non-integer value "{}" found in integer column, returning as string instead.'.format(value), - DataErrorWarning) + warn( + 'Non-integer value "{}" found in integer column, returning as string instead.'.format( + value + ), + DataErrorWarning, + ) return str(value) - elif type_string == 'boolean': + elif type_string == "boolean": value = str(value) - if value.lower() in ['true', '1']: + if value.lower() in ["true", "1"]: return True - elif value.lower() in ['false', '0']: + elif value.lower() in ["false", "0"]: return False else: - warn('Unrecognised value for boolean: "{}", returning as string instead'.format(value), - DataErrorWarning) + warn( + 'Unrecognised value for boolean: "{}", returning as string instead'.format( + value + ), + DataErrorWarning, + ) return str(value) - elif type_string in ('array', 'array_array', 'string_array', 'number_array'): + elif type_string in ("array", "array_array", "string_array", "number_array"): value = str(value) - if type_string == 'number_array': + if type_string == "number_array": try: - if ',' in value: - return [[Decimal(y) for y in x.split(',')] for x in value.split(';')] + if "," in value: + return [ + [Decimal(y) for y in x.split(",")] for x in value.split(";") + ] else: - return [Decimal(x) for x in value.split(';')] + return [Decimal(x) for x in value.split(";")] except (TypeError, ValueError, InvalidOperation): - warn('Non-numeric value "{}" found in number array column, returning as string array instead.'.format(value), - DataErrorWarning) - if ',' in value: - return [x.split(',') for x in value.split(';')] + warn( + 'Non-numeric value "{}" found in number array column, returning as string array instead.'.format( + value + ), + DataErrorWarning, + ) + if "," in value: + return [x.split(",") for x in value.split(";")] else: - return value.split(';') - elif type_string == 'string': + return value.split(";") + elif type_string == "string": if type(value) == datetime.datetime: return timezone.localize(value).isoformat() return str(value) - elif type_string == 'date': + elif type_string == "date": if type(value) == datetime.datetime: return value.date().isoformat() return str(value) - elif type_string == '': + elif type_string == "": if type(value) == datetime.datetime: return timezone.localize(value).isoformat() if type(value) == float and int(value) == value: @@ -104,7 +114,7 @@ def convert_type(type_string, value, timezone = pytz.timezone('UTC')): def warnings_for_ignored_columns(v, extra_message): if isinstance(v, Cell): - warn('Column {} has been ignored, {}'.format(v.cell_location[3], extra_message)) + warn("Column {} has been ignored, {}".format(v.cell_location[3], extra_message)) elif isinstance(v, dict): for x in v.values(): warnings_for_ignored_columns(x, extra_message) @@ -126,37 +136,82 @@ def merge(base, mergee, debug_info=None): if key in base: if isinstance(value, TemporaryDict): if not isinstance(base[key], TemporaryDict): - warnings_for_ignored_columns(v, 'because it treats {} as an array, but another column does not'.format(key)) + warnings_for_ignored_columns( + v, + "because it treats {} as an array, but another column does not".format( + key + ), + ) continue for temporarydict_key, temporarydict_value in value.items(): if temporarydict_key in base[key]: - merge(base[key][temporarydict_key], temporarydict_value, debug_info) + merge( + base[key][temporarydict_key], + temporarydict_value, + debug_info, + ) else: - assert temporarydict_key not in base[key], 'Overwriting cell {} by mistake'.format(temporarydict_value) + assert ( + temporarydict_key not in base[key] + ), "Overwriting cell {} by mistake".format(temporarydict_value) base[key][temporarydict_key] = temporarydict_value - for temporarydict_value in value.items_no_keyfield: + for temporarydict_value in value.items_no_keyfield: base[key].items_no_keyfield.append(temporarydict_value) elif isinstance(value, dict): if isinstance(base[key], dict): merge(base[key], value, debug_info) else: - warnings_for_ignored_columns(v, 'because it treats {} as an object, but another column does not'.format(key)) + warnings_for_ignored_columns( + v, + "because it treats {} as an object, but another column does not".format( + key + ), + ) else: if not isinstance(base[key], Cell): - id_info = '{} "{}"'.format(debug_info.get('id_name'), debug_info.get(debug_info.get('id_name'))) - if debug_info.get('root_id'): - id_info = '{} "{}", '.format(debug_info.get('root_id'), debug_info.get('root_id_or_none'))+id_info - warnings_for_ignored_columns(v, 'because another column treats it as an array or object'.format(key)) + id_info = '{} "{}"'.format( + debug_info.get("id_name"), + debug_info.get(debug_info.get("id_name")), + ) + if debug_info.get("root_id"): + id_info = ( + '{} "{}", '.format( + debug_info.get("root_id"), + debug_info.get("root_id_or_none"), + ) + + id_info + ) + warnings_for_ignored_columns( + v, + "because another column treats it as an array or object".format( + key + ), + ) continue base_value = base[key].cell_value if base_value != value: - id_info = '{} "{}"'.format(debug_info.get('id_name'), debug_info.get(debug_info.get('id_name'))) - if debug_info.get('root_id'): - id_info = '{} "{}", '.format(debug_info.get('root_id'), debug_info.get('root_id_or_none'))+id_info + id_info = '{} "{}"'.format( + debug_info.get("id_name"), + debug_info.get(debug_info.get("id_name")), + ) + if debug_info.get("root_id"): + id_info = ( + '{} "{}", '.format( + debug_info.get("root_id"), + debug_info.get("root_id_or_none"), + ) + + id_info + ) warn( 'You may have a duplicate Identifier: We couldn\'t merge these rows with the {}: field "{}" in sheet "{}": one cell has the value: "{}", the other cell has the value: "{}"'.format( - id_info, key, debug_info.get('sheet_name'), base_value, value), - DataErrorWarning) + id_info, + key, + debug_info.get("sheet_name"), + base_value, + value, + ), + DataErrorWarning, + ) else: base[key].sub_cells.append(v) else: @@ -171,6 +226,7 @@ class SpreadsheetInput(object): or csv). """ + def convert_dict_titles(self, dicts, title_lookup=None): """ Replace titles with field names in the given list of dictionaries @@ -181,25 +237,28 @@ def convert_dict_titles(self, dicts, title_lookup=None): title_lookup = self.parser.title_lookup for d in dicts: if title_lookup: - yield OrderedDict([(title_lookup.lookup_header(k), v) for k,v in d.items()]) + yield OrderedDict( + [(title_lookup.lookup_header(k), v) for k, v in d.items()] + ) else: yield d - def __init__(self, - input_name='', - root_list_path='main', - root_is_list=False, - timezone_name='UTC', - root_id='ocid', - convert_titles=False, - vertical_orientation=False, - include_sheets=[], - exclude_sheets=[], - id_name='id', - xml=False, - base_configuration={}, - use_configuration=True - ): + def __init__( + self, + input_name="", + root_list_path="main", + root_is_list=False, + timezone_name="UTC", + root_id="ocid", + convert_titles=False, + vertical_orientation=False, + include_sheets=[], + exclude_sheets=[], + id_name="id", + xml=False, + base_configuration={}, + use_configuration=True, + ): self.input_name = input_name self.root_list_path = root_list_path self.root_is_list = root_is_list @@ -220,14 +279,20 @@ def __init__(self, def get_sub_sheets_lines(self): for sub_sheet_name in self.sub_sheet_names: if self.convert_titles: - yield sub_sheet_name, self.convert_dict_titles(self.get_sheet_lines(sub_sheet_name), - self.parser.sub_sheets[sub_sheet_name].title_lookup if sub_sheet_name in self.parser.sub_sheets else None) + yield sub_sheet_name, self.convert_dict_titles( + self.get_sheet_lines(sub_sheet_name), + self.parser.sub_sheets[sub_sheet_name].title_lookup + if sub_sheet_name in self.parser.sub_sheets + else None, + ) else: yield sub_sheet_name, self.get_sheet_lines(sub_sheet_name) def configure_sheets(self): for sub_sheet_name in self.sub_sheet_names: - self.sheet_configuration[sub_sheet_name] = parse_sheet_configuration(self.get_sheet_configuration(sub_sheet_name)) + self.sheet_configuration[sub_sheet_name] = parse_sheet_configuration( + self.get_sheet_configuration(sub_sheet_name) + ) def get_sheet_configuration(self, sheet_name): return [] @@ -259,57 +324,59 @@ def do_unflatten(self): if actual_heading is None: continue if actual_heading in found: - found[actual_heading].append((last_col-i)-1) + found[actual_heading].append((last_col - i) - 1) else: found[actual_heading] = [i] for actual_heading in reversed(found): if len(found[actual_heading]) > 1: - keeping = found[actual_heading][0] + keeping = found[actual_heading][0] # noqa ignoring = found[actual_heading][1:] ignoring.reverse() if len(ignoring) >= 3: warn( ( 'Duplicate heading "{}" found, ignoring ' - 'the data in columns {} and {}.' + "the data in columns {} and {}." ).format( actual_heading, - ', '.join( - [_get_column_letter(x+1) for x in ignoring[:-1]] + ", ".join( + [ + _get_column_letter(x + 1) + for x in ignoring[:-1] + ] ), _get_column_letter(ignoring[-1] + 1), ), - DataErrorWarning + DataErrorWarning, ) elif len(found[actual_heading]) == 3: warn( ( 'Duplicate heading "{}" found, ignoring ' - 'the data in columns {} and {}.' + "the data in columns {} and {}." ).format( actual_heading, _get_column_letter(ignoring[0] + 1), _get_column_letter(ignoring[1] + 1), ), - DataErrorWarning + DataErrorWarning, ) else: warn( ( 'Duplicate heading "{}" found, ignoring ' - 'the data in column {}.' + "the data in column {}." ).format( - actual_heading, - _get_column_letter(ignoring[0]+1), + actual_heading, _get_column_letter(ignoring[0] + 1), ), - DataErrorWarning + DataErrorWarning, ) except NotImplementedError: # The ListInput type used in the tests doesn't support getting headings. actual_headings = None for j, line in enumerate(lines): - if all(x is None or x == '' for x in line.values()): - #if all(x == '' for x in line.values()): + if all(x is None or x == "" for x in line.values()): + # if all(x == '' for x in line.values()): continue root_id_or_none = line.get(self.root_id) if self.root_id else None cells = OrderedDict() @@ -319,32 +386,49 @@ def do_unflatten(self): # This is misleading as it specifies the row number as the distance vertically # and the horizontal 'letter' as a number. # https://github.com/OpenDataServices/flatten-tool/issues/153 - cells[header] = Cell(line[header], (sheet_name, str(k+1), j+2, heading)) + cells[header] = Cell( + line[header], (sheet_name, str(k + 1), j + 2, heading) + ) else: - cells[header] = Cell(line[header], (sheet_name, _get_column_letter(k+1), j+2, heading)) - unflattened = unflatten_main_with_parser(self.parser, cells, self.timezone, self.xml, self.id_name) + cells[header] = Cell( + line[header], + (sheet_name, _get_column_letter(k + 1), j + 2, heading), + ) + unflattened = unflatten_main_with_parser( + self.parser, cells, self.timezone, self.xml, self.id_name + ) if root_id_or_none not in main_sheet_by_ocid: - main_sheet_by_ocid[root_id_or_none] = TemporaryDict(self.id_name, xml=self.xml) + main_sheet_by_ocid[root_id_or_none] = TemporaryDict( + self.id_name, xml=self.xml + ) + def inthere(unflattened, id_name): if self.xml: - return unflattened[id_name]['text()'].cell_value + return unflattened[id_name]["text()"].cell_value else: return unflattened[id_name].cell_value - if self.id_name in unflattened and inthere(unflattened, self.id_name) in main_sheet_by_ocid[root_id_or_none]: + + if ( + self.id_name in unflattened + and inthere(unflattened, self.id_name) + in main_sheet_by_ocid[root_id_or_none] + ): if self.xml: - unflattened_id = unflattened.get(self.id_name)['text()'].cell_value + unflattened_id = unflattened.get(self.id_name)[ + "text()" + ].cell_value else: unflattened_id = unflattened.get(self.id_name).cell_value merge( main_sheet_by_ocid[root_id_or_none][unflattened_id], unflattened, { - 'sheet_name': sheet_name, - 'root_id': self.root_id, - 'root_id_or_none': root_id_or_none, - 'id_name': self.id_name, - self.id_name: unflattened_id - } + "sheet_name": sheet_name, + "root_id": self.root_id, + "root_id_or_none": root_id_or_none, + "id_name": self.id_name, + self.id_name: unflattened_id, + }, ) else: main_sheet_by_ocid[root_id_or_none].append(unflattened) @@ -362,14 +446,16 @@ def fancy_unflatten(self, with_cell_source_map, with_heading_source_map): ordered_cell_source_map = None heading_source_map = None if with_cell_source_map or with_heading_source_map: - cell_source_map = extract_list_to_error_path([] if self.root_is_list else [self.root_list_path], cell_tree) + cell_source_map = extract_list_to_error_path( + [] if self.root_is_list else [self.root_list_path], cell_tree + ) ordered_items = sorted(cell_source_map.items()) row_source_map = OrderedDict() heading_source_map = OrderedDict() for path, _ in ordered_items: cells = cell_source_map[path] # Prepare row_source_map key - key = '/'.join(str(x) for x in path[:-1]) + key = "/".join(str(x) for x in path[:-1]) if not key in row_source_map: row_source_map[key] = [] if with_heading_source_map: @@ -380,7 +466,7 @@ def fancy_unflatten(self, with_cell_source_map, with_heading_source_map): int(x) except: header_path_parts.append(x) - header_path = '/'.join(header_path_parts) + header_path = "/".join(header_path_parts) if header_path not in heading_source_map: heading_source_map[header_path] = [] # Populate the row and header source maps @@ -392,51 +478,66 @@ def fancy_unflatten(self, with_cell_source_map, with_heading_source_map): if (sheet, header) not in heading_source_map[header_path]: heading_source_map[header_path].append((sheet, header)) if with_cell_source_map: - ordered_cell_source_map = OrderedDict(( '/'.join(str(x) for x in path), location) for path, location in ordered_items) + ordered_cell_source_map = OrderedDict( + ("/".join(str(x) for x in path), location) + for path, location in ordered_items + ) for key in row_source_map: - assert key not in ordered_cell_source_map, 'Row/cell collision: {}'.format(key) + assert ( + key not in ordered_cell_source_map + ), "Row/cell collision: {}".format(key) ordered_cell_source_map[key] = row_source_map[key] return result, ordered_cell_source_map, heading_source_map + def extract_list_to_error_path(path, input): output = {} for i, item in enumerate(input): res = extract_dict_to_error_path(path + [i], item) for p in res: - assert p not in output, 'Already have key {}'.format(p) + assert p not in output, "Already have key {}".format(p) output[p] = res[p] return output + def extract_dict_to_error_path(path, input): output = {} for k in input: if isinstance(input[k], list): - res = extract_list_to_error_path(path+[k], input[k]) + res = extract_list_to_error_path(path + [k], input[k]) for p in res: - assert p not in output, 'Already have key {}'.format(p) + assert p not in output, "Already have key {}".format(p) output[p] = res[p] elif isinstance(input[k], dict): - res = extract_dict_to_error_path(path+[k], input[k]) + res = extract_dict_to_error_path(path + [k], input[k]) for p in res: - assert p not in output, 'Already have key {}'.format(p) + assert p not in output, "Already have key {}".format(p) output[p] = res[p] elif isinstance(input[k], Cell): - p = tuple(path+[k]) - assert p not in output, 'Already have key {}'.format(p) + p = tuple(path + [k]) + assert p not in output, "Already have key {}".format(p) output[p] = [input[k].cell_location] for sub_cell in input[k].sub_cells: - assert sub_cell.cell_value == input[k].cell_value, 'Two sub-cells have different values: {}, {}'.format(input[k].cell_value, sub_cell.cell_value) + assert ( + sub_cell.cell_value == input[k].cell_value + ), "Two sub-cells have different values: {}, {}".format( + input[k].cell_value, sub_cell.cell_value + ) output[p].append(sub_cell.cell_location) else: - raise Exception('Unexpected result type in the JSON cell tree: {}'.format(input[k])) + raise Exception( + "Unexpected result type in the JSON cell tree: {}".format(input[k]) + ) return output + def extract_list_to_value(input): output = [] for item in input: output.append(extract_dict_to_value(item)) return output + def extract_dict_to_value(input): output = OrderedDict() for k in input: @@ -447,12 +548,14 @@ def extract_dict_to_value(input): elif isinstance(input[k], Cell): output[k] = input[k].cell_value else: - raise Exception('Unexpected result type in the JSON cell tree: {}'.format(input[k])) + raise Exception( + "Unexpected result type in the JSON cell tree: {}".format(input[k]) + ) return output class CSVInput(SpreadsheetInput): - encoding = 'utf-8' + encoding = "utf-8" def get_sheet_headings(self, sheet_name): sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]] @@ -466,7 +569,9 @@ def get_sheet_headings(self, sheet_name): # returning empty headers is a proxy for no data in the sheet. return [] - with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file: + with open( + os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding + ) as main_sheet_file: r = csvreader(main_sheet_file) for num, row in enumerate(r): if num == (skip_rows + configuration_line): @@ -474,7 +579,9 @@ def get_sheet_headings(self, sheet_name): def read_sheets(self): sheet_file_names = os.listdir(self.input_name) - sheet_names = sorted([fname[:-4] for fname in sheet_file_names if fname.endswith('.csv')]) + sheet_names = sorted( + [fname[:-4] for fname in sheet_file_names if fname.endswith(".csv")] + ) if self.include_sheets: for sheet in list(sheet_names): if sheet not in self.include_sheets: @@ -485,7 +592,9 @@ def read_sheets(self): except ValueError: pass self.sub_sheet_names = sheet_names - self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in sheet_names) + self.sheet_names_map = OrderedDict( + (sheet_name, sheet_name) for sheet_name in sheet_names + ) self.configure_sheets() def generate_rows(self, dictreader, sheet_name): @@ -499,7 +608,7 @@ def generate_rows(self, dictreader, sheet_name): skip_rows = sheet_configuration.get("skipRows", 0) header_rows = sheet_configuration.get("headerRows", 1) for i in range(0, configuration_line + skip_rows): - previous_row = next(dictreader.reader) + previous_row = next(dictreader.reader) # noqa fieldnames = dictreader.fieldnames for i in range(0, header_rows - 1): next(dictreader.reader) @@ -507,18 +616,20 @@ def generate_rows(self, dictreader, sheet_name): yield OrderedDict((fieldname, line[fieldname]) for fieldname in fieldnames) def get_sheet_configuration(self, sheet_name): - with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file: + with open( + os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding + ) as main_sheet_file: r = csvreader(main_sheet_file) heading_row = next(r) - if len(heading_row) > 0 and heading_row[0] == '#': + if len(heading_row) > 0 and heading_row[0] == "#": return heading_row[1:] return [] - - def get_sheet_lines(self, sheet_name): # Pass the encoding to the open function - with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file: + with open( + os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding + ) as main_sheet_file: dictreader = DictReader(main_sheet_file) for row in self.generate_rows(dictreader, sheet_name): yield row @@ -532,11 +643,15 @@ class XLSXInput(SpreadsheetInput): def read_sheets(self): try: self.workbook = openpyxl.load_workbook(self.input_name, data_only=True) - except BadZipFile as e: + except BadZipFile as e: # noqa # TODO when we have python3 only add 'from e' to show exception chain - raise BadXLSXZipFile("The supplied file has extension .xlsx but isn't an XLSX file.") + raise BadXLSXZipFile( + "The supplied file has extension .xlsx but isn't an XLSX file." + ) - self.sheet_names_map = OrderedDict((sheet_name, sheet_name) for sheet_name in self.workbook.sheetnames) + self.sheet_names_map = OrderedDict( + (sheet_name, sheet_name) for sheet_name in self.workbook.sheetnames + ) if self.include_sheets: for sheet in list(self.sheet_names_map): if sheet not in self.include_sheets: @@ -558,24 +673,36 @@ def get_sheet_headings(self, sheet_name): sheet_configuration = {} skip_rows = sheet_configuration.get("skipRows", 0) - if (sheet_configuration.get("ignore") or - (sheet_configuration.get("hashcomments") and sheet_name.startswith('#'))): + if sheet_configuration.get("ignore") or ( + sheet_configuration.get("hashcomments") and sheet_name.startswith("#") + ): # returning empty headers is a proxy for no data in the sheet. return [] if self.vertical_orientation: - return [cell.value for cell in worksheet[_get_column_letter(skip_rows + 1)][configuration_line:]] + return [ + cell.value + for cell in worksheet[_get_column_letter(skip_rows + 1)][ + configuration_line: + ] + ] try: - return [cell.value for cell in worksheet[skip_rows + configuration_line + 1]] + return [ + cell.value for cell in worksheet[skip_rows + configuration_line + 1] + ] except IndexError: # If the heading line is after data in the spreadsheet. i.e when skipRows return [] def get_sheet_configuration(self, sheet_name): worksheet = self.workbook[self.sheet_names_map[sheet_name]] - if worksheet['A1'].value == '#': - return [cell.value for num, cell in enumerate(worksheet[1]) if num != 0 and cell.value] + if worksheet["A1"].value == "#": + return [ + cell.value + for num, cell in enumerate(worksheet[1]) + if num != 0 and cell.value + ] else: return [] @@ -590,17 +717,20 @@ def get_sheet_lines(self, sheet_name): skip_rows = sheet_configuration.get("skipRows", 0) header_rows = sheet_configuration.get("headerRows", 1) - worksheet = self.workbook[self.sheet_names_map[sheet_name]] if self.vertical_orientation: header_row = worksheet[_get_column_letter(skip_rows + 1)] remaining_rows = worksheet.iter_cols(min_col=skip_rows + header_rows + 1) if configuration_line: header_row = header_row[1:] - remaining_rows = worksheet.iter_cols(min_col=skip_rows + header_rows + 1, min_row=2) + remaining_rows = worksheet.iter_cols( + min_col=skip_rows + header_rows + 1, min_row=2 + ) else: header_row = worksheet[skip_rows + configuration_line + 1] - remaining_rows = worksheet.iter_rows(min_row=skip_rows + configuration_line + header_rows + 1) + remaining_rows = worksheet.iter_rows( + min_row=skip_rows + configuration_line + header_rows + 1 + ) coli_to_header = {} for i, header in enumerate(header_row): @@ -614,7 +744,7 @@ def get_sheet_lines(self, sheet_name): if not header: # None means that the cell will be ignored value = None - elif sheet_configuration.get("hashcomments") and header.startswith('#'): + elif sheet_configuration.get("hashcomments") and header.startswith("#"): # None means that the cell will be ignored value = None output_row[header] = value @@ -640,10 +770,10 @@ def read_sheets(self): def _resolve_sheet_configuration(self, sheet_name): sheet_configuration = self.sheet_configuration[sheet_name] if not self.use_configuration: - return {'unused_config_line': True} if sheet_configuration else {} + return {"unused_config_line": True} if sheet_configuration else {} if not sheet_configuration: sheet_configuration = self.base_configuration - sheet_configuration['base_configuration'] = True + sheet_configuration["base_configuration"] = True return sheet_configuration @@ -651,16 +781,25 @@ def get_sheet_headings(self, sheet_name): worksheet = self.sheet_names_map[sheet_name] sheet_configuration = self._resolve_sheet_configuration(sheet_name) - configuration_line = 1 if sheet_configuration and 'base_configuration' not in sheet_configuration else 0 + configuration_line = ( + 1 + if sheet_configuration and "base_configuration" not in sheet_configuration + else 0 + ) skip_rows = sheet_configuration.get("skipRows", 0) - if (sheet_configuration.get("ignore") or - (sheet_configuration.get("hashcomments") and sheet_name.startswith('#'))): + if sheet_configuration.get("ignore") or ( + sheet_configuration.get("hashcomments") and sheet_name.startswith("#") + ): # returning empty headers is a proxy for no data in the sheet. return [] if self.vertical_orientation: - return [row[skip_rows] for row in worksheet[configuration_line:] if len(row) > skip_rows] + return [ + row[skip_rows] + for row in worksheet[configuration_line:] + if len(row) > skip_rows + ] try: return [cell for cell in worksheet[skip_rows + configuration_line]] @@ -675,7 +814,7 @@ def get_sheet_configuration(self, sheet_name): try: # cell A1 - if worksheet[0][0] == '#': + if worksheet[0][0] == "#": return worksheet[0] except IndexError: @@ -690,26 +829,41 @@ def get_sheet_lines(self, sheet_name): # yield OrderedDict([('a/b', '4'), ('a/c', '5'), ('d', '6')]) sheet_configuration = self._resolve_sheet_configuration(sheet_name) - configuration_line = 1 if sheet_configuration and 'base_configuration' not in sheet_configuration else 0 + configuration_line = ( + 1 + if sheet_configuration and "base_configuration" not in sheet_configuration + else 0 + ) skip_rows = sheet_configuration.get("skipRows", 0) header_rows = sheet_configuration.get("headerRows", 1) worksheet = self.sheet_names_map[sheet_name] if self.vertical_orientation: - header_row = [row[skip_rows] for row in worksheet[configuration_line:] if len(row) > skip_rows] - longest_horizonal_row = max(len(row) for row in worksheet[configuration_line:]) - remaining_rows = [[row[i] if len(row) > i else None for row in worksheet[configuration_line:] if row] for i in range(1, longest_horizonal_row)] + header_row = [ + row[skip_rows] + for row in worksheet[configuration_line:] + if len(row) > skip_rows + ] + longest_horizonal_row = max( + len(row) for row in worksheet[configuration_line:] + ) + remaining_rows = [ + [ + row[i] if len(row) > i else None + for row in worksheet[configuration_line:] + if row + ] + for i in range(1, longest_horizonal_row) + ] else: header_row = worksheet[skip_rows + configuration_line] - remaining_rows = worksheet[(skip_rows + configuration_line - + header_rows):] + remaining_rows = worksheet[(skip_rows + configuration_line + header_rows) :] coli_to_header = {} for i, header in enumerate(header_row): coli_to_header[i] = header - for row in remaining_rows: output_row = OrderedDict() for i, x in enumerate(row): @@ -719,7 +873,7 @@ def get_sheet_lines(self, sheet_name): if not header: # None means that the cell will be ignored value = None - elif sheet_configuration.get("hashcomments") and header.startswith('#'): + elif sheet_configuration.get("hashcomments") and header.startswith("#"): # None means that the cell will be ignored value = None output_row[header] = value @@ -728,22 +882,18 @@ def get_sheet_lines(self, sheet_name): yield output_row -FORMATS = { - 'xlsx': XLSXInput, - 'csv': CSVInput, - 'ods': ODSInput -} - +FORMATS = {"xlsx": XLSXInput, "csv": CSVInput, "ods": ODSInput} class ListAsDict(dict): pass + def list_as_dicts_to_temporary_dicts(unflattened, id_name, xml): for key, value in list(unflattened.items()): if isinstance(value, Cell): continue - if hasattr(value, 'items'): + if hasattr(value, "items"): if not value: unflattened.pop(key) list_as_dicts_to_temporary_dicts(value, id_name, xml) @@ -759,94 +909,126 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name): unflattened = OrderedDict() for path, cell in line.items(): # Skip blank cells - if cell.cell_value is None or cell.cell_value == '': + if cell.cell_value is None or cell.cell_value == "": continue current_path = unflattened - path_list = [item.rstrip('[]') for item in str(path).split('/')] + path_list = [item.rstrip("[]") for item in str(path).split("/")] for num, path_item in enumerate(path_list): if isint(path_item): if num == 0: - warn('Column "{}" has been ignored because it is a number.'.format(path), DataErrorWarning) + warn( + 'Column "{}" has been ignored because it is a number.'.format( + path + ), + DataErrorWarning, + ) continue current_type = None - path_till_now = '/'.join([item for item in path_list[:num + 1] if not isint(item)]) + path_till_now = "/".join( + [item for item in path_list[: num + 1] if not isint(item)] + ) if parser: current_type = parser.flattened.get(path_till_now) try: next_path_item = path_list[num + 1] except IndexError: - next_path_item = '' + next_path_item = "" # Quick solution to avoid casting of date as datetinme in spreadsheet > xml if xml: if type(cell.cell_value) == datetime.datetime and not next_path_item: - if 'datetime' not in path: - current_type = 'date' + if "datetime" not in path: + current_type = "date" ## Array list_index = -1 if isint(next_path_item): - if current_type and current_type != 'array': - raise ValueError("There is an array at '{}' when the schema says there should be a '{}'".format(path_till_now, current_type)) + if current_type and current_type != "array": + raise ValueError( + "There is an array at '{}' when the schema says there should be a '{}'".format( + path_till_now, current_type + ) + ) list_index = int(next_path_item) - current_type = 'array' + current_type = "array" - if current_type == 'array': + if current_type == "array": list_as_dict = current_path.get(path_item) if list_as_dict is None: list_as_dict = ListAsDict() current_path[path_item] = list_as_dict elif type(list_as_dict) is not ListAsDict: - warn('Column {} has been ignored, because it treats {} as an array, but another column does not.'.format(path, path_till_now), - DataErrorWarning) + warn( + "Column {} has been ignored, because it treats {} as an array, but another column does not.".format( + path, path_till_now + ), + DataErrorWarning, + ) break new_path = list_as_dict.get(list_index) if new_path is None: new_path = OrderedDict() list_as_dict[list_index] = new_path current_path = new_path - if not xml or num < len(path_list)-2: + if not xml or num < len(path_list) - 2: # In xml "arrays" can have text values, if they're the final element # This corresponds to a tag with text, but also possibly attributes continue ## Object - if current_type == 'object' or (not current_type and next_path_item): + if current_type == "object" or (not current_type and next_path_item): new_path = current_path.get(path_item) if new_path is None: new_path = OrderedDict() current_path[path_item] = new_path - elif type(new_path) is ListAsDict or not hasattr(new_path, 'items'): - warn('Column {} has been ignored, because it treats {} as an object, but another column does not.'.format(path, path_till_now), - DataErrorWarning) + elif type(new_path) is ListAsDict or not hasattr(new_path, "items"): + warn( + "Column {} has been ignored, because it treats {} as an object, but another column does not.".format( + path, path_till_now + ), + DataErrorWarning, + ) break current_path = new_path continue - if current_type and current_type not in ['object', 'array'] and next_path_item: - raise ValueError("There is an object or list at '{}' but it should be an {}".format(path_till_now, current_type)) + if ( + current_type + and current_type not in ["object", "array"] + and next_path_item + ): + raise ValueError( + "There is an object or list at '{}' but it should be an {}".format( + path_till_now, current_type + ) + ) ## Other Types current_path_value = current_path.get(path_item) - if not xml and (type(current_path_value) is ListAsDict or hasattr(current_path_value, 'items')): + if not xml and ( + type(current_path_value) is ListAsDict + or hasattr(current_path_value, "items") + ): # ^ # xml can have an object/array that also has a text value warn( - 'Column {} has been ignored, because another column treats it as an array or object'.format( - path_till_now), - DataErrorWarning) + "Column {} has been ignored, because another column treats it as an array or object".format( + path_till_now + ), + DataErrorWarning, + ) continue value = cell.cell_value - if xml and current_type == 'array': + if xml and current_type == "array": # In xml "arrays" can have text values, if they're the final element # However the type of the text value itself should not be "array", # as that would split the text on commas, which we don't want. # https://github.com/OpenDataServices/cove/issues/1030 - converted_value = convert_type('', value, timezone) + converted_value = convert_type("", value, timezone) else: - converted_value = convert_type(current_type or '', value, timezone) + converted_value = convert_type(current_type or "", value, timezone) cell.cell_value = converted_value - if converted_value is not None and converted_value != '': + if converted_value is not None and converted_value != "": if xml: # For XML we want to support text and attributes at the # same level, e.g. @@ -855,15 +1037,15 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name): # {"@a":"b", "text()": "some text"} # To ensure we can attach attributes everywhere, all # element text must be added as a dict with a `text()` key. - if path_item.startswith('@'): + if path_item.startswith("@"): current_path[path_item] = cell else: - if current_type == 'array': - current_path['text()'] = cell + if current_type == "array": + current_path["text()"] = cell elif path_item not in current_path: - current_path[path_item] = {'text()': cell} + current_path[path_item] = {"text()": cell} else: - current_path[path_item]['text()'] = cell + current_path[path_item]["text()"] = cell else: current_path[path_item] = cell @@ -871,36 +1053,43 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name): return unflattened - -def path_search(nested_dict, path_list, id_fields=None, path=None, top=False, top_sheet=False): +def path_search( + nested_dict, path_list, id_fields=None, path=None, top=False, top_sheet=False +): if not path_list: return nested_dict id_fields = id_fields or {} parent_field = path_list[0] - path = parent_field if path is None else path+'/'+parent_field + path = parent_field if path is None else path + "/" + parent_field - if parent_field.endswith('[]') or top: - if parent_field.endswith('[]'): + if parent_field.endswith("[]") or top: + if parent_field.endswith("[]"): parent_field = parent_field[:-2] if parent_field not in nested_dict: - nested_dict[parent_field] = TemporaryDict(keyfield=id_name, top_sheet=top_sheet, xml=xml) - sub_sheet_id = id_fields.get(path+'/id') + nested_dict[parent_field] = TemporaryDict( + keyfield=id_name, top_sheet=top_sheet, xml=xml # noqa + ) + sub_sheet_id = id_fields.get(path + "/id") if sub_sheet_id not in nested_dict[parent_field]: nested_dict[parent_field][sub_sheet_id] = {} - return path_search(nested_dict[parent_field][sub_sheet_id], - path_list[1:], - id_fields=id_fields, - path=path, - top_sheet=top_sheet) + return path_search( + nested_dict[parent_field][sub_sheet_id], + path_list[1:], + id_fields=id_fields, + path=path, + top_sheet=top_sheet, + ) else: if parent_field not in nested_dict: nested_dict[parent_field] = OrderedDict() - return path_search(nested_dict[parent_field], - path_list[1:], - id_fields=id_fields, - path=path, - top_sheet=top_sheet) + return path_search( + nested_dict[parent_field], + path_list[1:], + id_fields=id_fields, + path=path, + top_sheet=top_sheet, + ) class TemporaryDict(UserDict): @@ -912,15 +1101,17 @@ def __init__(self, keyfield, top_sheet=False, xml=False): self.xml = xml def __repr__(self): - return 'TemporaryDict(keyfield={}, items_no_keyfield={}, data={})'.format(repr(self.keyfield), repr(self.items_no_keyfield), repr(self.data)) + return "TemporaryDict(keyfield={}, items_no_keyfield={}, data={})".format( + repr(self.keyfield), repr(self.items_no_keyfield), repr(self.data) + ) def append(self, item): if self.keyfield in item: if self.xml: - if isinstance(item[self.keyfield]['text()'], Cell): - key = item[self.keyfield]['text()'].cell_value + if isinstance(item[self.keyfield]["text()"], Cell): + key = item[self.keyfield]["text()"].cell_value else: - key = item[self.keyfield]['text()'] + key = item[self.keyfield]["text()"] else: if isinstance(item[self.keyfield], Cell): key = item[self.keyfield].cell_value @@ -942,11 +1133,11 @@ def temporarydicts_to_lists(nested_dict): for key, value in nested_dict.items(): if isinstance(value, Cell): continue - if hasattr(value, 'to_list'): + if hasattr(value, "to_list"): temporarydicts_to_lists(value) - if hasattr(value, 'items_no_keyfield'): + if hasattr(value, "items_no_keyfield"): for x in value.items_no_keyfield: temporarydicts_to_lists(x) nested_dict[key] = value.to_list() - elif hasattr(value, 'items'): + elif hasattr(value, "items"): temporarydicts_to_lists(value) diff --git a/flattentool/json_input.py b/flattentool/json_input.py index d612787d..8401f758 100644 --- a/flattentool/json_input.py +++ b/flattentool/json_input.py @@ -5,18 +5,20 @@ """ -import os -import json +import codecs import copy +import json +import os from collections import OrderedDict from decimal import Decimal -from flattentool.schema import SchemaParser, make_sub_sheet_name -from flattentool.input import path_search -from flattentool.sheet import Sheet from warnings import warn -import codecs + import xmltodict +from flattentool.input import path_search +from flattentool.schema import make_sub_sheet_name +from flattentool.sheet import Sheet + BASIC_TYPES = [str, bool, int, Decimal, type(None)] @@ -33,6 +35,7 @@ def sheet_key_field(sheet, key): sheet.append(key) return key + def sheet_key_title(sheet, key): """ If the key has a corresponding title, return that. If doesn't, create it in the sheet and return it. @@ -67,23 +70,23 @@ def dicts_to_list_of_dicts(lists_of_dicts_paths_set, xml_dict, path=()): if isinstance(value, list): for x in value: if isinstance(x, dict): - dicts_to_list_of_dicts(lists_of_dicts_paths_set, x, path+(key,)) + dicts_to_list_of_dicts(lists_of_dicts_paths_set, x, path + (key,)) elif isinstance(value, dict): - child_path = path+(key,) + child_path = path + (key,) dicts_to_list_of_dicts(lists_of_dicts_paths_set, value, child_path) if child_path in lists_of_dicts_paths_set: xml_dict[key] = [value] def list_dict_consistency(xml_dict): - ''' + """ For use with XML files opened with xmltodict. If there is only one tag, xmltodict produces a dict. If there are multiple, xmltodict produces a list of dicts. This functions replaces dicts with lists of dicts, if there exists a list of dicts for the same path elsewhere in the file. - ''' + """ lists_of_dicts_paths_set = set(lists_of_dicts_paths(xml_dict)) dicts_to_list_of_dicts(lists_of_dicts_paths_set, xml_dict) @@ -92,10 +95,23 @@ class JSONParser(object): # Named for consistency with schema.SchemaParser, but not sure it's the most appropriate name. # Similarily with methods like parse_json_dict - def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, root_list_path=None, - root_id='ocid', use_titles=False, xml=False, id_name='id', filter_field=None, - filter_value=None, preserve_fields=None, remove_empty_schema_columns=False, - rollup=False, truncation_length=3): + def __init__( + self, + json_filename=None, + root_json_dict=None, + schema_parser=None, + root_list_path=None, + root_id="ocid", + use_titles=False, + xml=False, + id_name="id", + filter_field=None, + filter_value=None, + preserve_fields=None, + remove_empty_schema_columns=False, + rollup=False, + truncation_length=3, + ): self.sub_sheets = {} self.main_sheet = Sheet() self.root_list_path = root_list_path @@ -108,7 +124,7 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, self.filter_value = filter_value self.remove_empty_schema_columns = remove_empty_schema_columns self.seen_paths = set() - + if schema_parser: self.main_sheet = copy.deepcopy(schema_parser.main_sheet) self.sub_sheets = copy.deepcopy(schema_parser.sub_sheets) @@ -127,8 +143,10 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, if schema_parser and len(schema_parser.rollup) > 0: # If rollUp is present in the schema this takes precedence over direct input. self.rollup = schema_parser.rollup - if isinstance(rollup, (list,)) and (len(rollup) > 1 or (len(rollup) == 1 and rollup[0] is not True)): - warn('Using rollUp values from schema, ignoring direct input.') + if isinstance(rollup, (list,)) and ( + len(rollup) > 1 or (len(rollup) == 1 and rollup[0] is not True) + ): + warn("Using rollUp values from schema, ignoring direct input.") elif isinstance(rollup, (list,)): if len(rollup) == 1 and os.path.isfile(rollup[0]): # Parse file, one json path per line. @@ -140,19 +158,21 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, self.rollup = rollup_from_file # Rollup args passed directly at the commandline elif len(rollup) == 1 and rollup[0] is True: - warn('No fields to rollup found (pass json path directly, as a list in a file, or via a schema)') + warn( + "No fields to rollup found (pass json path directly, as a list in a file, or via a schema)" + ) else: self.rollup = set(rollup) else: - warn('Invalid value passed for rollup (pass json path directly, as a list in a file, or via a schema)') + warn( + "Invalid value passed for rollup (pass json path directly, as a list in a file, or via a schema)" + ) if self.xml: - with codecs.open(json_filename, 'rb') as xml_file: + with codecs.open(json_filename, "rb") as xml_file: top_dict = xmltodict.parse( - xml_file, - force_list=(root_list_path,), - force_cdata=True, - ) + xml_file, force_list=(root_list_path,), force_cdata=True, + ) # AFAICT, this should be true for *all* XML files assert len(top_dict) == 1 root_json_dict = list(top_dict.values())[0] @@ -160,15 +180,19 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, json_filename = None if json_filename is None and root_json_dict is None: - raise ValueError('Etiher json_filename or root_json_dict must be supplied') + raise ValueError("Etiher json_filename or root_json_dict must be supplied") if json_filename is not None and root_json_dict is not None: - raise ValueError('Only one of json_file or root_json_dict should be supplied') + raise ValueError( + "Only one of json_file or root_json_dict should be supplied" + ) if json_filename: - with codecs.open(json_filename, encoding='utf-8') as json_file: + with codecs.open(json_filename, encoding="utf-8") as json_file: try: - self.root_json_dict = json.load(json_file, object_pairs_hook=OrderedDict, parse_float=Decimal) + self.root_json_dict = json.load( + json_file, object_pairs_hook=OrderedDict, parse_float=Decimal + ) except UnicodeError as err: raise BadlyFormedJSONErrorUTF8(*err.args) except ValueError as err: @@ -183,9 +207,11 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, with open(preserve_fields) as preserve_fields_file: for line in preserve_fields_file: line = line.strip() - path_fields = line.rsplit('/', 1) - preserve_fields_all = preserve_fields_all + path_fields + [line.rstrip('/')] - preserve_fields_input = preserve_fields_input + [line.rstrip('/')] + path_fields = line.rsplit("/", 1) + preserve_fields_all = ( + preserve_fields_all + path_fields + [line.rstrip("/")] + ) + preserve_fields_input = preserve_fields_input + [line.rstrip("/")] self.preserve_fields = set(preserve_fields_all) self.preserve_fields_input = set(preserve_fields_input) @@ -195,7 +221,11 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, for field in self.preserve_fields_input: if field not in self.schema_parser.flattened.keys(): input_not_in_schema.add(field) - warn('You wanted to preserve the following fields which are not present in the supplied schema: {}'.format(list(input_not_in_schema))) + warn( + "You wanted to preserve the following fields which are not present in the supplied schema: {}".format( + list(input_not_in_schema) + ) + ) except AttributeError: # no schema pass @@ -203,12 +233,13 @@ def __init__(self, json_filename=None, root_json_dict=None, schema_parser=None, self.preserve_fields = None self.preserve_fields_input = None - def parse(self): if self.root_list_path is None: root_json_list = self.root_json_dict else: - root_json_list = path_search(self.root_json_dict, self.root_list_path.split('/')) + root_json_list = path_search( + self.root_json_dict, self.root_list_path.split("/") + ) for json_dict in root_json_list: if json_dict is None: # This is particularly useful for IATI XML, in order to not @@ -228,10 +259,22 @@ def parse(self): if field not in self.seen_paths: nonexistent_input_paths.append(field) if len(nonexistent_input_paths) > 0: - warn('You wanted to preserve the following fields which are not present in the input data: {}'.format(nonexistent_input_paths)) - - - def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flattened_dict=None, parent_id_fields=None, top_level_of_sub_sheet=False): + warn( + "You wanted to preserve the following fields which are not present in the input data: {}".format( + nonexistent_input_paths + ) + ) + + def parse_json_dict( + self, + json_dict, + sheet, + json_key=None, + parent_name="", + flattened_dict=None, + parent_id_fields=None, + top_level_of_sub_sheet=False, + ): """ Parse a json dictionary. @@ -254,7 +297,7 @@ def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flatt else: top = False - if parent_name == '' and self.filter_field and self.filter_value: + if parent_name == "" and self.filter_field and self.filter_value: if self.filter_field not in json_dict: return if json_dict[self.filter_field] != self.filter_value: @@ -264,7 +307,7 @@ def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flatt # Add the IDs for the top level of object in an array for k, v in parent_id_fields.items(): if self.xml: - flattened_dict[sheet_key(sheet, k)] = v['#text'] + flattened_dict[sheet_key(sheet, k)] = v["#text"] else: flattened_dict[sheet_key(sheet, k)] = v @@ -272,12 +315,14 @@ def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flatt parent_id_fields[sheet_key(sheet, self.root_id)] = json_dict[self.root_id] if self.id_name in json_dict: - parent_id_fields[sheet_key(sheet, parent_name+self.id_name)] = json_dict[self.id_name] + parent_id_fields[sheet_key(sheet, parent_name + self.id_name)] = json_dict[ + self.id_name + ] for key, value in json_dict.items(): # Keep a unique list of all the JSON paths in the data that have been seen. - parent_path = parent_name.replace('/0', '') + parent_path = parent_name.replace("/0", "") full_path = parent_path + key self.seen_paths.add(full_path) @@ -291,81 +336,144 @@ def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flatt continue if type(value) in BASIC_TYPES: - if self.xml and key == '#text': + if self.xml and key == "#text": # Handle the text output from xmltodict - key = '' - parent_name = parent_name.strip('/') - flattened_dict[sheet_key(sheet, parent_name+key)] = value - elif hasattr(value, 'items'): + key = "" + parent_name = parent_name.strip("/") + flattened_dict[sheet_key(sheet, parent_name + key)] = value + elif hasattr(value, "items"): self.parse_json_dict( value, sheet=sheet, json_key=key, - parent_name=parent_name+key+'/', + parent_name=parent_name + key + "/", flattened_dict=flattened_dict, - parent_id_fields=parent_id_fields) - elif hasattr(value, '__iter__'): + parent_id_fields=parent_id_fields, + ) + elif hasattr(value, "__iter__"): if all(type(x) in BASIC_TYPES for x in value): # Check for an array of BASIC types # TODO Make this check the schema # TODO Error if the any of the values contain the seperator # TODO Support doubly nested arrays - flattened_dict[sheet_key(sheet, parent_name+key)] = ';'.join(map(str, value)) + flattened_dict[sheet_key(sheet, parent_name + key)] = ";".join( + map(str, value) + ) else: - if self.rollup and parent_name == '': # Rollup only currently possible to main sheet - + if ( + self.rollup and parent_name == "" + ): # Rollup only currently possible to main sheet + if self.use_titles and not self.schema_parser: - warn('Warning: No schema was provided so column headings are JSON keys, not titles.') + warn( + "Warning: No schema was provided so column headings are JSON keys, not titles." + ) if len(value) == 1: for k, v in value[0].items(): - if self.preserve_fields and parent_name+key+'/'+k not in self.preserve_fields: + if ( + self.preserve_fields + and parent_name + key + "/" + k + not in self.preserve_fields + ): continue if type(v) not in BASIC_TYPES: - raise ValueError('Rolled up values must be basic types') + raise ValueError( + "Rolled up values must be basic types" + ) else: if self.schema_parser: # We want titles and there's a schema and rollUp is in it - if self.use_titles and \ - parent_name+key+'/0/'+k in self.schema_parser.main_sheet.titles: - flattened_dict[sheet_key_title(sheet, parent_name+key+'/0/'+k)] = v - + if ( + self.use_titles + and parent_name + key + "/0/" + k + in self.schema_parser.main_sheet.titles + ): + flattened_dict[ + sheet_key_title( + sheet, parent_name + key + "/0/" + k + ) + ] = v + # We want titles and there's a schema but rollUp isn't in it # so the titles for rollup properties aren't in the main sheet # so we need to try to get the titles from a subsheet - elif self.use_titles and parent_name+key in self.rollup and \ - parent_name+key in self.schema_parser.sub_sheets: - relevant_subsheet = self.schema_parser.sub_sheets.get(parent_name+key) + elif ( + self.use_titles + and parent_name + key in self.rollup + and parent_name + key + in self.schema_parser.sub_sheets + ): + relevant_subsheet = self.schema_parser.sub_sheets.get( + parent_name + key + ) if relevant_subsheet is not None: - rollup_field_title = sheet_key_title(relevant_subsheet, parent_name+key+'/0/'+k) - flattened_dict[sheet_key(sheet, rollup_field_title)] = v - + rollup_field_title = sheet_key_title( + relevant_subsheet, + parent_name + key + "/0/" + k, + ) + flattened_dict[ + sheet_key(sheet, rollup_field_title) + ] = v + # We don't want titles even though there's a schema - elif not self.use_titles and \ - (parent_name+key+'/0/'+k in self.schema_parser.main_sheet or \ - parent_name+key in self.rollup): - flattened_dict[sheet_key(sheet, parent_name+key+'/0/'+k)] = v + elif not self.use_titles and ( + parent_name + key + "/0/" + k + in self.schema_parser.main_sheet + or parent_name + key in self.rollup + ): + flattened_dict[ + sheet_key( + sheet, parent_name + key + "/0/" + k + ) + ] = v # No schema, so no titles - elif parent_name+key in self.rollup: - flattened_dict[sheet_key(sheet, parent_name+key+'/0/'+k)] = v - + elif parent_name + key in self.rollup: + flattened_dict[ + sheet_key( + sheet, parent_name + key + "/0/" + k + ) + ] = v + elif len(value) > 1: for k in set(sum((list(x.keys()) for x in value), [])): - if self.preserve_fields and parent_name+key+'/'+k not in self.preserve_fields: + if ( + self.preserve_fields + and parent_name + key + "/" + k + not in self.preserve_fields + ): continue - if self.schema_parser and parent_name+key+'/0/'+k in self.schema_parser.main_sheet: - warn('More than one value supplied for "{}". Could not provide rollup, so adding a warning to the relevant cell(s) in the spreadsheet.'.format(parent_name+key)) - flattened_dict[sheet_key(sheet, parent_name+key+'/0/'+k)] = 'WARNING: More than one value supplied, consult the relevant sub-sheet for the data.' - elif parent_name+key in self.rollup: - warn('More than one value supplied for "{}". Could not provide rollup, so adding a warning to the relevant cell(s) in the spreadsheet.'.format(parent_name+key)) - flattened_dict[sheet_key(sheet, parent_name+key+'/0/'+k)] = 'WARNING: More than one value supplied, consult the relevant sub-sheet for the data.' - - sub_sheet_name = make_sub_sheet_name(parent_name, key, truncation_length=self.truncation_length) + if ( + self.schema_parser + and parent_name + key + "/0/" + k + in self.schema_parser.main_sheet + ): + warn( + 'More than one value supplied for "{}". Could not provide rollup, so adding a warning to the relevant cell(s) in the spreadsheet.'.format( + parent_name + key + ) + ) + flattened_dict[ + sheet_key(sheet, parent_name + key + "/0/" + k) + ] = "WARNING: More than one value supplied, consult the relevant sub-sheet for the data." + elif parent_name + key in self.rollup: + warn( + 'More than one value supplied for "{}". Could not provide rollup, so adding a warning to the relevant cell(s) in the spreadsheet.'.format( + parent_name + key + ) + ) + flattened_dict[ + sheet_key(sheet, parent_name + key + "/0/" + k) + ] = "WARNING: More than one value supplied, consult the relevant sub-sheet for the data." + + sub_sheet_name = make_sub_sheet_name( + parent_name, key, truncation_length=self.truncation_length + ) if sub_sheet_name not in self.sub_sheets: self.sub_sheets[sub_sheet_name] = Sheet(name=sub_sheet_name) @@ -377,10 +485,11 @@ def parse_json_dict(self, json_dict, sheet, json_key=None, parent_name='', flatt sheet=self.sub_sheets[sub_sheet_name], json_key=key, parent_id_fields=parent_id_fields, - parent_name=parent_name+key+'/0/', - top_level_of_sub_sheet=True) + parent_name=parent_name + key + "/0/", + top_level_of_sub_sheet=True, + ) else: - raise ValueError('Unsupported type {}'.format(type(value))) + raise ValueError("Unsupported type {}".format(type(value))) if top: sheet.lines.append(flattened_dict) diff --git a/flattentool/lib.py b/flattentool/lib.py index 3429718e..63fc3ab8 100644 --- a/flattentool/lib.py +++ b/flattentool/lib.py @@ -5,22 +5,23 @@ def isint(string): except ValueError: return False + def parse_sheet_configuration(configuration_list): configuration = {} for item in configuration_list: parts = item.split() - if (len(parts) == 2 and parts[0].lower() == "skiprows" and isint(parts[1])): - configuration['skipRows'] = max(int(parts[1]), 0) - if (len(parts) == 2 and parts[0].lower() == "headerrows" and isint(parts[1])): - configuration['headerRows'] = max(int(parts[1]), 1) - if (len(parts) == 1 and parts[0].lower() == "ignore"): - configuration['ignore'] = True - if (len(parts) == 1 and parts[0].lower() in ("hashcomments", "hashcomment")): - configuration['hashcomments'] = True - if (len(parts) == 2 and parts[0].lower() == "xmlroottag"): - configuration['XMLRootTag'] = parts[1] - if (len(parts) == 2 and parts[0].lower() == "rootlistpath"): - configuration['RootListPath'] = parts[1] - if (len(parts) == 2 and parts[0].lower() == "idname"): - configuration['IDName'] = parts[1] + if len(parts) == 2 and parts[0].lower() == "skiprows" and isint(parts[1]): + configuration["skipRows"] = max(int(parts[1]), 0) + if len(parts) == 2 and parts[0].lower() == "headerrows" and isint(parts[1]): + configuration["headerRows"] = max(int(parts[1]), 1) + if len(parts) == 1 and parts[0].lower() == "ignore": + configuration["ignore"] = True + if len(parts) == 1 and parts[0].lower() in ("hashcomments", "hashcomment"): + configuration["hashcomments"] = True + if len(parts) == 2 and parts[0].lower() == "xmlroottag": + configuration["XMLRootTag"] = parts[1] + if len(parts) == 2 and parts[0].lower() == "rootlistpath": + configuration["RootListPath"] = parts[1] + if len(parts) == 2 and parts[0].lower() == "idname": + configuration["IDName"] = parts[1] return configuration diff --git a/flattentool/output.py b/flattentool/output.py index b9c05038..32bc874b 100644 --- a/flattentool/output.py +++ b/flattentool/output.py @@ -4,22 +4,25 @@ """ -import openpyxl -from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE import csv import os from warnings import warn -from flattentool.exceptions import DataErrorWarning -from odf.opendocument import OpenDocumentSpreadsheet import odf.table import odf.text +import openpyxl +from odf.opendocument import OpenDocumentSpreadsheet +from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE + +from flattentool.exceptions import DataErrorWarning class SpreadsheetOutput(object): # output_name is given a default here, partly to help with tests, # but should have been defined by the time we get here. - def __init__(self, parser, main_sheet_name='main', output_name='unflattened', sheet_prefix=''): + def __init__( + self, parser, main_sheet_name="main", output_name="unflattened", sheet_prefix="" + ): self.parser = parser self.main_sheet_name = main_sheet_name self.output_name = output_name @@ -58,10 +61,14 @@ def write_sheet(self, sheet_name, sheet): for header in sheet_header: value = sheet_line.get(header) if isinstance(value, str): - new_value = ILLEGAL_CHARACTERS_RE.sub('', value) + new_value = ILLEGAL_CHARACTERS_RE.sub("", value) if new_value != value: - warn("Character(s) in '{}' are not allowed in a spreadsheet cell. Those character(s) will be removed".format(value), - DataErrorWarning) + warn( + "Character(s) in '{}' are not allowed in a spreadsheet cell. Those character(s) will be removed".format( + value + ), + DataErrorWarning, + ) value = new_value line.append(value) worksheet.append(line) @@ -80,7 +87,11 @@ def open(self): def write_sheet(self, sheet_name, sheet): sheet_header = list(sheet) - with open(os.path.join(self.output_name, self.sheet_prefix + sheet_name+'.csv'), 'w', encoding='utf-8') as csv_file: + with open( + os.path.join(self.output_name, self.sheet_prefix + sheet_name + ".csv"), + "w", + encoding="utf-8", + ) as csv_file: dictwriter = csv.DictWriter(csv_file, sheet_header) dictwriter.writeheader() for sheet_line in sheet.lines: @@ -97,8 +108,7 @@ def _make_cell(self, value): if value: try: # See if value parses as a float - cell = odf.table.TableCell(valuetype="float", - value=float(value)) + cell = odf.table.TableCell(valuetype="float", value=float(value)) except ValueError: cell = odf.table.TableCell(valuetype="string") else: @@ -126,10 +136,14 @@ def write_sheet(self, sheet_name, sheet): for header in sheet_header: value = sheet_line.get(header) if isinstance(value, str): - new_value = ILLEGAL_CHARACTERS_RE.sub('', value) + new_value = ILLEGAL_CHARACTERS_RE.sub("", value) if new_value != value: - warn("Character(s) in '{}' are not allowed in a spreadsheet cell. Those character(s) will be removed".format(value), - DataErrorWarning) + warn( + "Character(s) in '{}' are not allowed in a spreadsheet cell. Those character(s) will be removed".format( + value + ), + DataErrorWarning, + ) value = new_value row.addElement(self._make_cell(value)) worksheet.addElement(row) @@ -140,14 +154,10 @@ def close(self): self.workbook.save(self.output_name) -FORMATS = { - 'xlsx': XLSXOutput, - 'csv': CSVOutput, - 'ods': ODSOutput -} +FORMATS = {"xlsx": XLSXOutput, "csv": CSVOutput, "ods": ODSOutput} FORMATS_SUFFIX = { - 'xlsx': '.xlsx', - 'ods': '.ods', - 'csv': '' # This is the suffix for the directory + "xlsx": ".xlsx", + "ods": ".ods", + "csv": "", # This is the suffix for the directory } diff --git a/flattentool/schema.py b/flattentool/schema.py index 21a5aea7..c0ce12ee 100644 --- a/flattentool/schema.py +++ b/flattentool/schema.py @@ -1,14 +1,17 @@ """Classes for reading from a JSON schema""" -from __future__ import print_function -from __future__ import unicode_literals -from collections import OrderedDict, UserDict -import jsonref -from warnings import warn -from flattentool.sheet import Sheet +from __future__ import print_function, unicode_literals + import codecs import os import sys +from collections import OrderedDict, UserDict +from warnings import warn + +import jsonref + +from flattentool.sheet import Sheet + if sys.version_info[:2] > (3, 0): import pathlib else: @@ -16,7 +19,7 @@ def get_property_type_set(property_schema_dict): - property_type = property_schema_dict.get('type', []) + property_type = property_schema_dict.get("type", []) if not isinstance(property_type, list): return set([property_type]) else: @@ -24,8 +27,10 @@ def get_property_type_set(property_schema_dict): def make_sub_sheet_name(parent_path, property_name, truncation_length=3): - return ('_'.join(x[:truncation_length] for x in parent_path.split('/') if x != '0') + property_name)[:31] - + return ( + "_".join(x[:truncation_length] for x in parent_path.split("/") if x != "0") + + property_name + )[:31] class TitleLookup(UserDict): @@ -33,7 +38,7 @@ class TitleLookup(UserDict): def lookup_header(self, title_header): if type(title_header) == str: - return self.lookup_header_list(title_header.split(':')) + return self.lookup_header_list(title_header.split(":")) else: return title_header @@ -42,56 +47,76 @@ def lookup_header_list(self, title_header_list): remaining_titles = title_header_list[1:] try: int(first_title) - return first_title + ('/' + self.lookup_header_list(remaining_titles) if remaining_titles else '') + return first_title + ( + "/" + self.lookup_header_list(remaining_titles) + if remaining_titles + else "" + ) except ValueError: pass if first_title in self: if remaining_titles: - return self[first_title].property_name + '/' + self[first_title].lookup_header_list(remaining_titles) + return ( + self[first_title].property_name + + "/" + + self[first_title].lookup_header_list(remaining_titles) + ) else: return self[first_title].property_name else: # If we can't look up the title, treat it and any children as # field names directly. # Strip spaces off these. - return '/'.join(x.strip(' ') for x in title_header_list) + return "/".join(x.strip(" ") for x in title_header_list) def __setitem__(self, key, value): - self.data[key.replace(' ', '').lower()] = value + self.data[key.replace(" ", "").lower()] = value def __getitem__(self, key): if key is None: raise KeyError else: - return self.data[key.replace(' ', '').lower()] + return self.data[key.replace(" ", "").lower()] def __contains__(self, key): if key is None: return False else: - return key.replace(' ', '').lower() in self.data + return key.replace(" ", "").lower() in self.data class JsonLoaderLocalRefUsedWhenLocalRefsDisabled(Exception): pass + class JsonLoaderLocalRefsDisabled(jsonref.JsonLoader): def __call__(self, uri, **kwargs): if self.is_ref_local(uri): - raise JsonLoaderLocalRefUsedWhenLocalRefsDisabled("Local Ref Used When Local Refs Disabled: " + uri) + raise JsonLoaderLocalRefUsedWhenLocalRefsDisabled( + "Local Ref Used When Local Refs Disabled: " + uri + ) else: return super(JsonLoaderLocalRefsDisabled, self).__call__(uri, **kwargs) def is_ref_local(self, uri): - return uri[:7].lower() != 'http://' and uri[:8].lower() != 'https://' + return uri[:7].lower() != "http://" and uri[:8].lower() != "https://" class SchemaParser(object): """Parse the fields of a JSON schema into a flattened structure.""" - def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, root_id=None, use_titles=False, - disable_local_refs=False, truncation_length=3, exclude_deprecated_fields=False): + def __init__( + self, + schema_filename=None, + root_schema_dict=None, + rollup=False, + root_id=None, + use_titles=False, + disable_local_refs=False, + truncation_length=3, + exclude_deprecated_fields=False, + ): self.sub_sheets = {} self.main_sheet = Sheet() self.sub_sheet_mapping = {} @@ -104,127 +129,174 @@ def __init__(self, schema_filename=None, root_schema_dict=None, rollup=False, ro self.flattened = {} self.exclude_deprecated_fields = exclude_deprecated_fields - if root_schema_dict is None and schema_filename is None: - raise ValueError('One of schema_filename or root_schema_dict must be supplied') + if root_schema_dict is None and schema_filename is None: + raise ValueError( + "One of schema_filename or root_schema_dict must be supplied" + ) if root_schema_dict is not None and schema_filename is not None: - raise ValueError('Only one of schema_filename or root_schema_dict should be supplied') + raise ValueError( + "Only one of schema_filename or root_schema_dict should be supplied" + ) if schema_filename: - if schema_filename.startswith('http'): + if schema_filename.startswith("http"): import requests + r = requests.get(schema_filename) - self.root_schema_dict = jsonref.loads(r.text, object_pairs_hook=OrderedDict) + self.root_schema_dict = jsonref.loads( + r.text, object_pairs_hook=OrderedDict + ) else: if disable_local_refs: with codecs.open(schema_filename, encoding="utf-8") as schema_file: - self.root_schema_dict = jsonref.load(schema_file, object_pairs_hook=OrderedDict, - loader=JsonLoaderLocalRefsDisabled()) + self.root_schema_dict = jsonref.load( + schema_file, + object_pairs_hook=OrderedDict, + loader=JsonLoaderLocalRefsDisabled(), + ) else: if sys.version_info[:2] > (3, 0): - base_uri = pathlib.Path(os.path.realpath(schema_filename)).as_uri() + base_uri = pathlib.Path( + os.path.realpath(schema_filename) + ).as_uri() else: - base_uri = urlparse.urljoin('file:', urllib.pathname2url(os.path.abspath(schema_filename))) + base_uri = urlparse.urljoin( + "file:", + urllib.pathname2url(os.path.abspath(schema_filename)), + ) with codecs.open(schema_filename, encoding="utf-8") as schema_file: - self.root_schema_dict = jsonref.load(schema_file, object_pairs_hook=OrderedDict, - base_uri=base_uri) - + self.root_schema_dict = jsonref.load( + schema_file, + object_pairs_hook=OrderedDict, + base_uri=base_uri, + ) else: self.root_schema_dict = root_schema_dict def parse(self): - fields = self.parse_schema_dict('', self.root_schema_dict) + fields = self.parse_schema_dict("", self.root_schema_dict) for field, title in fields: if self.use_titles: if not title: - warn('Field {} does not have a title, skipping.'.format(field)) + warn("Field {} does not have a title, skipping.".format(field)) else: self.main_sheet.append(title) self.main_sheet.titles[field] = title else: self.main_sheet.append(field) - def parse_schema_dict(self, parent_path, schema_dict, parent_id_fields=None, title_lookup=None, parent_title=''): + def parse_schema_dict( + self, + parent_path, + schema_dict, + parent_id_fields=None, + title_lookup=None, + parent_title="", + ): if parent_path: - parent_path = parent_path + '/' + parent_path = parent_path + "/" parent_id_fields = parent_id_fields or [] title_lookup = self.title_lookup if title_lookup is None else title_lookup - if 'type' in schema_dict and schema_dict['type'] == 'array' \ - and 'items' in schema_dict and 'oneOf' in schema_dict['items']: - for oneOf in schema_dict['items']['oneOf']: - if 'type' in oneOf and oneOf['type'] == 'object': + if ( + "type" in schema_dict + and schema_dict["type"] == "array" + and "items" in schema_dict + and "oneOf" in schema_dict["items"] + ): + for oneOf in schema_dict["items"]["oneOf"]: + if "type" in oneOf and oneOf["type"] == "object": for field, child_title in self.parse_schema_dict( - parent_path, - oneOf, - parent_id_fields=parent_id_fields, - title_lookup=title_lookup, - parent_title=parent_title): - yield ( - field, - child_title - ) - - elif 'properties' in schema_dict: - if 'id' in schema_dict['properties']: + parent_path, + oneOf, + parent_id_fields=parent_id_fields, + title_lookup=title_lookup, + parent_title=parent_title, + ): + yield (field, child_title) + + elif "properties" in schema_dict: + if "id" in schema_dict["properties"]: if self.use_titles: - id_fields = parent_id_fields + [(parent_title if parent_title is not None else parent_path)+(schema_dict['properties']['id'].get('title') or 'id')] + id_fields = parent_id_fields + [ + (parent_title if parent_title is not None else parent_path) + + (schema_dict["properties"]["id"].get("title") or "id") + ] else: - id_fields = parent_id_fields + [parent_path+'id'] + id_fields = parent_id_fields + [parent_path + "id"] else: id_fields = parent_id_fields - for property_name, property_schema_dict in schema_dict['properties'].items(): - if self.exclude_deprecated_fields and property_schema_dict.get('deprecated'): + for property_name, property_schema_dict in schema_dict[ + "properties" + ].items(): + if self.exclude_deprecated_fields and property_schema_dict.get( + "deprecated" + ): continue property_type_set = get_property_type_set(property_schema_dict) - title = property_schema_dict.get('title') + title = property_schema_dict.get("title") if title: title_lookup[title] = TitleLookup() title_lookup[title].property_name = property_name - if 'object' in property_type_set: - self.flattened[parent_path+property_name] = "object" + if "object" in property_type_set: + self.flattened[parent_path + property_name] = "object" for field, child_title in self.parse_schema_dict( - parent_path+property_name, - property_schema_dict, - parent_id_fields=id_fields, - title_lookup=title_lookup.get(title), - parent_title=parent_title+title+':' if parent_title is not None and title else None): + parent_path + property_name, + property_schema_dict, + parent_id_fields=id_fields, + title_lookup=title_lookup.get(title), + parent_title=parent_title + title + ":" + if parent_title is not None and title + else None, + ): yield ( - property_name+'/'+field, + property_name + "/" + field, # TODO ambiguous use of "title" - (title+':'+child_title if title and child_title else None) + ( + title + ":" + child_title + if title and child_title + else None + ), ) - elif 'array' in property_type_set: - flattened_key = parent_path.replace('/0/', '/')+property_name + elif "array" in property_type_set: + flattened_key = parent_path.replace("/0/", "/") + property_name self.flattened[flattened_key] = "array" - type_set = get_property_type_set(property_schema_dict['items']) - if 'string' in type_set or not type_set: + type_set = get_property_type_set(property_schema_dict["items"]) + if "string" in type_set or not type_set: self.flattened[flattened_key] = "string_array" yield property_name, title - elif 'number' in type_set: + elif "number" in type_set: self.flattened[flattened_key] = "number_array" yield property_name, title - elif 'array' in type_set: + elif "array" in type_set: self.flattened[flattened_key] = "array_array" - nested_type_set = get_property_type_set(property_schema_dict['items']['items']) - if 'string' in nested_type_set or 'number' in nested_type_set: + nested_type_set = get_property_type_set( + property_schema_dict["items"]["items"] + ) + if "string" in nested_type_set or "number" in nested_type_set: yield property_name, title else: raise ValueError - elif 'object' in type_set: + elif "object" in type_set: if title: title_lookup[title].property_name = property_name - sub_sheet_name = make_sub_sheet_name(parent_path, property_name, - truncation_length=self.truncation_length) - #self.sub_sheet_mapping[parent_name+'/'+property_name] = sub_sheet_name + sub_sheet_name = make_sub_sheet_name( + parent_path, + property_name, + truncation_length=self.truncation_length, + ) + # self.sub_sheet_mapping[parent_name+'/'+property_name] = sub_sheet_name if sub_sheet_name not in self.sub_sheets: - self.sub_sheets[sub_sheet_name] = Sheet(root_id=self.root_id, name=sub_sheet_name) + self.sub_sheets[sub_sheet_name] = Sheet( + root_id=self.root_id, name=sub_sheet_name + ) sub_sheet = self.sub_sheets[sub_sheet_name] sub_sheet.title_lookup = title_lookup.get(title) @@ -232,58 +304,100 @@ def parse_schema_dict(self, parent_path, schema_dict, parent_id_fields=None, tit sub_sheet.add_field(field, id_field=True) sub_sheet.titles[title_lookup.lookup_header(field)] = field fields = self.parse_schema_dict( - parent_path+property_name+'/0', - property_schema_dict['items'], - parent_id_fields=id_fields, - title_lookup=title_lookup.get(title), - parent_title=parent_title+title+':' if parent_title is not None and title else None) - + parent_path + property_name + "/0", + property_schema_dict["items"], + parent_id_fields=id_fields, + title_lookup=title_lookup.get(title), + parent_title=parent_title + title + ":" + if parent_title is not None and title + else None, + ) + rollup_fields = set() for field, child_title in fields: - full_path = parent_path+property_name+'/0/'+field + full_path = parent_path + property_name + "/0/" + field if self.use_titles: if not child_title or parent_title is None: - warn('Field {}{}/0/{} is missing a title, skipping.'.format(parent_path, property_name, field)) + warn( + "Field {}{}/0/{} is missing a title, skipping.".format( + parent_path, property_name, field + ) + ) elif not title: - warn('Field {}{} does not have a title, skipping it and all its children.'.format(parent_path, property_name)) + warn( + "Field {}{} does not have a title, skipping it and all its children.".format( + parent_path, property_name + ) + ) else: # This code only works for arrays that are at 0 or 1 layer of nesting - full_title = parent_title+title+':'+child_title + full_title = ( + parent_title + title + ":" + child_title + ) sub_sheet.add_field(full_title) sub_sheet.titles[full_path] = full_title else: sub_sheet.add_field(full_path) - if self.do_rollup and 'rollUp' in property_schema_dict and field in property_schema_dict['rollUp']: + if ( + self.do_rollup + and "rollUp" in property_schema_dict + and field in property_schema_dict["rollUp"] + ): rollup_fields.add(field) self.rollup.add(full_path) - yield property_name+'/0/'+field, (title+':'+child_title if title and child_title else None) + yield property_name + "/0/" + field, ( + title + ":" + child_title + if title and child_title + else None + ) # Check that all items in rollUp are in the schema - if self.do_rollup and 'rollUp' in property_schema_dict: - missedRollUp = set(property_schema_dict['rollUp']) - rollup_fields + if self.do_rollup and "rollUp" in property_schema_dict: + missedRollUp = ( + set(property_schema_dict["rollUp"]) - rollup_fields + ) if missedRollUp: - warn('{} in rollUp but not in schema'.format(', '.join(missedRollUp))) + warn( + "{} in rollUp but not in schema".format( + ", ".join(missedRollUp) + ) + ) else: - raise ValueError('Unknown type_set: {}, did you forget to explicity set the "type" key on "items"?'.format(type_set)) - elif 'string' in property_type_set or not property_type_set: - self.flattened[parent_path.replace('/0/', '/')+property_name] = "string" + raise ValueError( + 'Unknown type_set: {}, did you forget to explicity set the "type" key on "items"?'.format( + type_set + ) + ) + elif "string" in property_type_set or not property_type_set: + self.flattened[ + parent_path.replace("/0/", "/") + property_name + ] = "string" yield property_name, title - elif 'number' in property_type_set: - self.flattened[parent_path.replace('/0/', '/')+property_name] = "number" + elif "number" in property_type_set: + self.flattened[ + parent_path.replace("/0/", "/") + property_name + ] = "number" yield property_name, title - elif 'integer' in property_type_set: - self.flattened[parent_path.replace('/0/', '/')+property_name] = "integer" + elif "integer" in property_type_set: + self.flattened[ + parent_path.replace("/0/", "/") + property_name + ] = "integer" yield property_name, title - elif 'boolean' in property_type_set: - self.flattened[parent_path.replace('/0/', '/')+property_name] = "boolean" + elif "boolean" in property_type_set: + self.flattened[ + parent_path.replace("/0/", "/") + property_name + ] = "boolean" yield property_name, title else: - warn('Unrecognised types {} for property "{}" with context "{}",' - 'so this property has been ignored.'.format( - repr(property_type_set), - property_name, - parent_path)) + warn( + 'Unrecognised types {} for property "{}" with context "{}",' + "so this property has been ignored.".format( + repr(property_type_set), property_name, parent_path + ) + ) else: - warn('Skipping field "{}", because it has no properties.'.format(parent_path)) + warn( + 'Skipping field "{}", because it has no properties.'.format(parent_path) + ) diff --git a/flattentool/sheet.py b/flattentool/sheet.py index 4fa484a8..05f2159a 100644 --- a/flattentool/sheet.py +++ b/flattentool/sheet.py @@ -4,7 +4,7 @@ class Sheet(object): """ - def __init__(self, columns=None, root_id='', name=None): + def __init__(self, columns=None, root_id="", name=None): self.id_columns = [] self.columns = columns if columns else [] self.titles = {} diff --git a/flattentool/sort_xml.py b/flattentool/sort_xml.py index 5debc6a8..ee8d3257 100644 --- a/flattentool/sort_xml.py +++ b/flattentool/sort_xml.py @@ -26,19 +26,20 @@ """ from collections import OrderedDict from warnings import warn + try: import lxml.etree as ET + # Note that lxml is now "required" - it's listed as a requirement in # setup.py and is needed for the tests to pass. # However, stdlib etree still exists as an unsupported feature. except ImportError: import xml.etree.ElementTree as ET - warn('Using stdlib etree may work, but is not supported. Please install lxml.') + + warn("Using stdlib etree may work, but is not supported. Please install lxml.") # Namespaces necessary for opening schema files -namespaces = { - 'xsd': 'http://www.w3.org/2001/XMLSchema' -} +namespaces = {"xsd": "http://www.w3.org/2001/XMLSchema"} class XMLSchemaWalker(object): @@ -47,6 +48,7 @@ class XMLSchemaWalker(object): Based on the Schema2Doc class in https://github.com/IATI/IATI-Standard-SSOT/blob/version-2.02/gen.py """ + def __init__(self, schemas): """ schema -- the filename of the schema to use, e.g. @@ -64,7 +66,10 @@ def get_schema_element(self, tag_name, name_attribute): e.g. iati-activities """ for tree in self.trees: - schema_element = tree.find("xsd:{0}[@name='{1}']".format(tag_name, name_attribute), namespaces=namespaces) + schema_element = tree.find( + "xsd:{0}[@name='{1}']".format(tag_name, name_attribute), + namespaces=namespaces, + ) if schema_element is not None: return schema_element return schema_element @@ -75,33 +80,38 @@ def element_loop(self, element, path): """ a = element.attrib type_elements = [] - if 'type' in a: - complexType = self.get_schema_element('complexType', a['type']) + if "type" in a: + complexType = self.get_schema_element("complexType", a["type"]) if complexType is not None: - type_elements = ( - complexType.findall('xsd:choice/xsd:element', - namespaces=namespaces) + - complexType.findall('xsd:sequence/xsd:element', - namespaces=namespaces)) + type_elements = complexType.findall( + "xsd:choice/xsd:element", namespaces=namespaces + ) + complexType.findall( + "xsd:sequence/xsd:element", namespaces=namespaces + ) children = ( element.findall( - 'xsd:complexType/xsd:choice/xsd:element', - namespaces=namespaces) + "xsd:complexType/xsd:choice/xsd:element", namespaces=namespaces + ) + element.findall( - 'xsd:complexType/xsd:sequence/xsd:element', - namespaces=namespaces) + "xsd:complexType/xsd:sequence/xsd:element", namespaces=namespaces + ) + element.findall( - 'xsd:complexType/xsd:all/xsd:element', - namespaces=namespaces) - + type_elements) + "xsd:complexType/xsd:all/xsd:element", namespaces=namespaces + ) + + type_elements + ) child_tuples = [] for child in children: a = child.attrib - if 'name' in a: - child_tuples.append((a['name'], child, None, a.get('minOccurs'), a.get('maxOccurs'))) + if "name" in a: + child_tuples.append( + (a["name"], child, None, a.get("minOccurs"), a.get("maxOccurs")) + ) else: - child_tuples.append((a['ref'], None, child, a.get('minOccurs'), a.get('maxOccurs'))) + child_tuples.append( + (a["ref"], None, child, a.get("minOccurs"), a.get("maxOccurs")) + ) return child_tuples def create_schema_dict(self, parent_name, parent_element=None): @@ -110,13 +120,16 @@ def create_schema_dict(self, parent_name, parent_element=None): elements in the provided schema. """ if parent_element is None: - parent_element = self.get_schema_element('element', parent_name) + parent_element = self.get_schema_element("element", parent_name) if parent_element is None: return {} - return OrderedDict([ - (name, self.create_schema_dict(name, element)) - for name, element, _, _, _ in self.element_loop(parent_element, '')]) + return OrderedDict( + [ + (name, self.create_schema_dict(name, element)) + for name, element, _, _, _ in self.element_loop(parent_element, "") + ] + ) def sort_element(element, schema_subdict): diff --git a/flattentool/tests/test_cli.py b/flattentool/tests/test_cli.py index b9b39e5b..d3ae8262 100644 --- a/flattentool/tests/test_cli.py +++ b/flattentool/tests/test_cli.py @@ -1,7 +1,9 @@ +# hint: test_argparse is provided by libpythonX.Y-testsuite on ubuntu +from test.test_argparse import ArgumentParserError, stderr_to_parser_error + import pytest + from flattentool import cli -# hint: test_argparse is provided by libpythonX.Y-testsuite on ubuntu -from test.test_argparse import stderr_to_parser_error, ArgumentParserError def test_create_parser(): @@ -9,8 +11,8 @@ def test_create_parser(): Command line arguments that should be acceptable """ parser = cli.create_parser() - args = parser.parse_args('create-template -s schema.json'.split()) - assert args.schema == 'schema.json' + args = parser.parse_args("create-template -s schema.json".split()) + assert args.schema == "schema.json" def test_create_parser_missing_required_options(): @@ -21,5 +23,5 @@ def test_create_parser_missing_required_options(): parser = cli.create_parser() with pytest.raises(ArgumentParserError) as excinfo: - stderr_to_parser_error(parser.parse_args, 'create-template'.split()) - assert 'required' in excinfo.value.stderr + stderr_to_parser_error(parser.parse_args, "create-template".split()) + assert "required" in excinfo.value.stderr diff --git a/flattentool/tests/test_docs.py b/flattentool/tests/test_docs.py index c8e303c1..1267eb56 100644 --- a/flattentool/tests/test_docs.py +++ b/flattentool/tests/test_docs.py @@ -3,112 +3,133 @@ import subprocess import sys import uuid -import pytest - -from os.path import join, getsize +from os.path import join +import pytest examples_in_docs_data = [] + def _get_examples_in_docs_data(): global examples_in_docs_data examples_in_docs_data = [] - for root, dirs, files in os.walk('examples'): + for root, dirs, files in os.walk("examples"): for filename in files: - if 'xlsx' in root and sys.version_info[:2] < (3,4): + if "xlsx" in root and sys.version_info[:2] < (3, 4): continue - if 'cmd.txt' in filename: + if "cmd.txt" in filename: examples_in_docs_data.append((root, filename)) + _get_examples_in_docs_data() def test_examples_receipt(): - with open('examples/receipt/source-map/expected.json', 'rb') as fp: + with open("examples/receipt/source-map/expected.json", "rb") as fp: expected = fp.read() for expected_filename in [ - 'normalised/expected.json', - 'combine-table-into-cafe/expected.json', - 'combine-table-into-cafe-2/expected.json', + "normalised/expected.json", + "combine-table-into-cafe/expected.json", + "combine-table-into-cafe-2/expected.json", ]: - with open('examples/receipt/' + expected_filename, 'rb') as fp2: - assert fp2.read() == expected, "Files differ: examples/receipt/source-map/expected.json, examples/receipt/{}".format( - expected_filename) + with open("examples/receipt/" + expected_filename, "rb") as fp2: + assert ( + fp2.read() == expected + ), "Files differ: examples/receipt/source-map/expected.json, examples/receipt/{}".format( + expected_filename + ) @pytest.mark.parametrize("root, filename", examples_in_docs_data) def test_example_in_doc(root, filename): - if os.path.exists(join(root, 'actual')) and os.path.isdir(join(root, 'actual')): - os.rename(join(root, 'actual'), join(root, 'actual.'+str(uuid.uuid4()))) - os.mkdir(join(root, 'actual')) - expected_return_code = 0 - expected_stdout = b'' - if os.path.exists(join(root, 'expected_return_code.txt')): - with open(join(root, 'expected_return_code.txt'), 'rb') as fp: - expected_return_code = int(fp.read().strip()) - with open(join(root, filename), 'rb') as fp: - cmds = str(fp.read(), 'utf8').strip().split('\n') - actual_stdout = b'' - actual_stderr = b'' - for cmd in cmds: - assert ( - cmd.startswith('$ flatten-tool ') or cmd.startswith('$ cat ') - ), "Expected commands to start with '$ flatten-tool'. This doesn't: {}".format(cmd) - # Since we are defining all the commands ourselves, this is reasonably safe - cmd_parts = shlex.split(cmd[len('$ '):]) - # Include coverage output in the results - if cmd_parts[0] == 'flatten-tool': - cmd_parts = [ - 'coverage', - 'run', - '--source', 'flattentool', - '--parallel-mode', - ] + cmd_parts - process = subprocess.Popen(cmd_parts, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (cmd_actual_stdout, cmd_actual_stderr) = process.communicate() - process.wait() - assert process.returncode == expected_return_code, cmd - actual_stdout += (cmd_actual_stdout or b'') - actual_stderr += (cmd_actual_stderr or b'') - if os.path.exists(join(root, 'expected')) and os.path.isdir(join(root, 'expected')): - # Create case - assert len(os.listdir(join(root, 'expected'))) == len(os.listdir(join(root, 'actual'))), "Different number of files. {}".format(cmds) - for expected_filename in os.listdir(join(root, 'expected')): - assert os.path.exists(join(root, 'actual', expected_filename)), "File {} was not generated {}".format(expected_filename, cmds) - with open(join(root, 'expected', expected_filename), 'rb') as fp_expected: - with open(join(root, 'actual', expected_filename), 'rb') as fp_actual: - assert _strip(fp_actual.read()) == _strip(fp_expected.read()), "File {} has unexpected content. {}".format(expected_filename, cmds) - expected_stdout = b'' - # Flatten case - if os.path.exists(join(root, 'expected.txt')): - with open(join(root, 'expected.txt'), 'rb') as fstdout: - expected_stdout = fstdout.read() - elif os.path.exists(join(root, 'expected.json')): - with open(join(root, 'expected.json'), 'rb') as fstdout: - expected_stdout = fstdout.read() - elif os.path.exists(join(root, 'expected.xml')): - with open(join(root, 'expected.xml'), 'rb') as fstdout: - expected_stdout = fstdout.read() - if 'help' in root: - # Ignore whitespace differences for help messages - assert b' '.join(actual_stdout.split()) == b' '.join(expected_stdout.split()) + if os.path.exists(join(root, "actual")) and os.path.isdir(join(root, "actual")): + os.rename(join(root, "actual"), join(root, "actual." + str(uuid.uuid4()))) + os.mkdir(join(root, "actual")) + expected_return_code = 0 + expected_stdout = b"" + if os.path.exists(join(root, "expected_return_code.txt")): + with open(join(root, "expected_return_code.txt"), "rb") as fp: + expected_return_code = int(fp.read().strip()) + with open(join(root, filename), "rb") as fp: + cmds = str(fp.read(), "utf8").strip().split("\n") + actual_stdout = b"" + actual_stderr = b"" + for cmd in cmds: + assert cmd.startswith("$ flatten-tool ") or cmd.startswith( + "$ cat " + ), "Expected commands to start with '$ flatten-tool'. This doesn't: {}".format( + cmd + ) + # Since we are defining all the commands ourselves, this is reasonably safe + cmd_parts = shlex.split(cmd[len("$ ") :]) + # Include coverage output in the results + if cmd_parts[0] == "flatten-tool": + cmd_parts = [ + "coverage", + "run", + "--source", + "flattentool", + "--parallel-mode", + ] + cmd_parts + process = subprocess.Popen( + cmd_parts, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + (cmd_actual_stdout, cmd_actual_stderr) = process.communicate() + process.wait() + assert process.returncode == expected_return_code, cmd + actual_stdout += cmd_actual_stdout or b"" + actual_stderr += cmd_actual_stderr or b"" + if os.path.exists(join(root, "expected")) and os.path.isdir(join(root, "expected")): + # Create case + assert len(os.listdir(join(root, "expected"))) == len( + os.listdir(join(root, "actual")) + ), "Different number of files. {}".format(cmds) + for expected_filename in os.listdir(join(root, "expected")): + assert os.path.exists( + join(root, "actual", expected_filename) + ), "File {} was not generated {}".format(expected_filename, cmds) + with open(join(root, "expected", expected_filename), "rb") as fp_expected: + with open(join(root, "actual", expected_filename), "rb") as fp_actual: + assert _strip(fp_actual.read()) == _strip( + fp_expected.read() + ), "File {} has unexpected content. {}".format( + expected_filename, cmds + ) + expected_stdout = b"" + # Flatten case + if os.path.exists(join(root, "expected.txt")): + with open(join(root, "expected.txt"), "rb") as fstdout: + expected_stdout = fstdout.read() + elif os.path.exists(join(root, "expected.json")): + with open(join(root, "expected.json"), "rb") as fstdout: + expected_stdout = fstdout.read() + elif os.path.exists(join(root, "expected.xml")): + with open(join(root, "expected.xml"), "rb") as fstdout: + expected_stdout = fstdout.read() + if "help" in root: + # Ignore whitespace differences for help messages + assert b" ".join(actual_stdout.split()) == b" ".join(expected_stdout.split()) + else: + assert _strip(actual_stdout) == _strip( + expected_stdout + ), "Different stdout: {}".format(cmds) + expected_stderr = b"" + if os.path.exists(join(root, "expected_stderr_partial.txt")): + with open(join(root, "expected_stderr_partial.txt"), "rb") as fstderr: + data = fstderr.read() + assert data in actual_stderr + if os.path.exists(join(root, "expected_stderr.json")): + with open(join(root, "expected_stderr.json"), "rb") as fstderr: + data = fstderr.read() + expected_stderr_lines = str(data, "utf8").split("\n") + for line in expected_stderr_lines: + if line: + expected_stderr += (line + "\n").encode("utf8") else: - assert _strip(actual_stdout) == _strip(expected_stdout), "Different stdout: {}".format(cmds) - expected_stderr = b'' - if os.path.exists(join(root, 'expected_stderr_partial.txt')): - with open(join(root, 'expected_stderr_partial.txt'), 'rb') as fstderr: - data = fstderr.read() - assert data in actual_stderr - if os.path.exists(join(root, 'expected_stderr.json')): - with open(join(root, 'expected_stderr.json'), 'rb') as fstderr: - data = fstderr.read() - expected_stderr_lines = str(data, 'utf8').split('\n') - for line in expected_stderr_lines: - if line: - expected_stderr += (line + '\n').encode('utf8') - else: - expected_stderr += b'\n' - assert _simplify_warnings(_strip(actual_stderr)) == _simplify_warnings(_strip(expected_stderr)), "Different stderr: {}".format(cmds) + expected_stderr += b"\n" + assert _simplify_warnings(_strip(actual_stderr)) == _simplify_warnings( + _strip(expected_stderr) + ), "Different stderr: {}".format(cmds) def test_expected_number_of_examples_in_docs_data(): @@ -116,23 +137,25 @@ def test_expected_number_of_examples_in_docs_data(): def _simplify_warnings(lines): - return '\n'.join([_simplify_line(line) for line in lines.split('\n')]) + return "\n".join([_simplify_line(line) for line in lines.split("\n")]) + def _simplify_line(line): - if 'DataErrorWarning: ' in line: - return line[line.find('DataErrorWarning: '):] + if "DataErrorWarning: " in line: + return line[line.find("DataErrorWarning: ") :] return line + # Older versions of Python have an extra whitespace at the end compared to newer ones # https://bugs.python.org/issue16333 def _strip(output): # Don't worry about any extra blank lines at the end either - outstr = str(output, 'utf8').rstrip('\n') - return '\n'.join(line.rstrip(' ') for line in outstr.split('\n')) + outstr = str(output, "utf8").rstrip("\n") + return "\n".join(line.rstrip(" ") for line in outstr.split("\n")) # Useful for a coverage check - see developer docs for how to run the check -if __name__ == '__main__': +if __name__ == "__main__": test_examples_receipt() for root, filename in examples_in_docs_data: test_example_in_doc(root, filename) diff --git a/flattentool/tests/test_end_to_end.py b/flattentool/tests/test_end_to_end.py index 28b4192b..26cd0768 100644 --- a/flattentool/tests/test_end_to_end.py +++ b/flattentool/tests/test_end_to_end.py @@ -1,4 +1,4 @@ -''' +""" The tests in these functions are the minimal cases necessary to give you a good understanding of the expected behaviour of flattentool. @@ -36,24 +36,27 @@ `jsonref`. TODO: Extra columns -''' +""" from __future__ import unicode_literals + from collections import OrderedDict from decimal import Decimal -import warnings -from jsonref import JsonRef import pytest +from jsonref import JsonRef -from flattentool.input import SpreadsheetInput, convert_type -from flattentool.tests.test_init import original_cell_and_row_locations, original_headings -from flattentool.schema import SchemaParser from flattentool.exceptions import DataErrorWarning +from flattentool.input import SpreadsheetInput +from flattentool.schema import SchemaParser +from flattentool.tests.test_init import ( + original_cell_and_row_locations, + original_headings, +) def test_type_conversion_no_schema(): - '''\ + """\ Without a schema flattentool keeps integers as they are, but makes everything else a string. @@ -61,27 +64,35 @@ def test_type_conversion_no_schema(): QUESTION: Is this behaviour predictable? Should everything be treated as a string perhaps? - ''' + """ sheets = [ { - 'name': 'main', - 'headings': ['int', 'string', 'decimal', 'float'], - 'rows': [ - [1, 'a', Decimal('1.2'), 1.3], - ['1', 'a', '1.2', '1.3'], - ['InvalidInt', 1, 'InvalidDecimal', 'InvalidFloat'], - ] + "name": "main", + "headings": ["int", "string", "decimal", "float"], + "rows": [ + [1, "a", Decimal("1.2"), 1.3], + ["1", "a", "1.2", "1.3"], + ["InvalidInt", 1, "InvalidDecimal", "InvalidFloat"], + ], } ] expected = [ - OrderedDict([('int', 1), ('string', 'a'), ('decimal', '1.2'), - ('float', '1.3')]), + OrderedDict( + [("int", 1), ("string", "a"), ("decimal", "1.2"), ("float", "1.3")] + ), # Note how int is 1 the first time, and '1' the second, with # everything else unchanged. - OrderedDict([('int', '1'), ('string', 'a'), ('decimal', '1.2'), - ('float', '1.3')]), - OrderedDict([('int', 'InvalidInt'), ('string', 1), - ('decimal', 'InvalidDecimal'), ('float', 'InvalidFloat')]) + OrderedDict( + [("int", "1"), ("string", "a"), ("decimal", "1.2"), ("float", "1.3")] + ), + OrderedDict( + [ + ("int", "InvalidInt"), + ("string", 1), + ("decimal", "InvalidDecimal"), + ("float", "InvalidFloat"), + ] + ), ] # TODO It would be nice to assert there are no warnings here, but py.test # doesn't seem to make this easy @@ -89,56 +100,65 @@ def test_type_conversion_no_schema(): def test_type_conversion_with_schema(): - ''' + """ With a schema flattentool converts input to the correct types. It returns int, float and decimal as `Decimal` instances though because the underlying schema treats them all just as 'number'. - ''' + """ sheets = [ { - 'name': 'main', - 'headings': ['int', 'string', 'decimal', 'float'], - 'rows': [ - [1, 'a', Decimal('1.2'), 1.3], - ['1', 'a', '1.2', '1.3'], - ['InvalidInt', 1, 'InvalidDecimal', 'InvalidFloat'], - ] + "name": "main", + "headings": ["int", "string", "decimal", "float"], + "rows": [ + [1, "a", Decimal("1.2"), 1.3], + ["1", "a", "1.2", "1.3"], + ["InvalidInt", 1, "InvalidDecimal", "InvalidFloat"], + ], } ] schema = { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'type': 'object', - 'properties': { - 'int': {'type': 'number'}, - 'string': {'type': 'string'}, - 'decimal': {'type': 'number'}, - 'float': {'type': 'number'}, + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "int": {"type": "number"}, + "string": {"type": "string"}, + "decimal": {"type": "number"}, + "float": {"type": "number"}, }, } expected = [ - OrderedDict([ - ('int', Decimal('1')), - ('string', 'a'), - ('decimal', Decimal('1.2')), - ('float', Decimal( - '1.3000000000000000444089209850062616169452667236328125' - )) - ]), + OrderedDict( + [ + ("int", Decimal("1")), + ("string", "a"), + ("decimal", Decimal("1.2")), + ( + "float", + Decimal("1.3000000000000000444089209850062616169452667236328125"), + ), + ] + ), # Notice how the decimal representation of the float isn't quite right # because of float errors. Probably better to put input in as strings # to avoid this issue. - OrderedDict([ - ('int', Decimal('1')), - ('string', 'a'), - ('decimal', Decimal('1.2')), - ('float', Decimal('1.3'))]), + OrderedDict( + [ + ("int", Decimal("1")), + ("string", "a"), + ("decimal", Decimal("1.2")), + ("float", Decimal("1.3")), + ] + ), # Notice how the schema validator allows through invalid data, but # converts things to strings if it can - OrderedDict([ - ('int', 'InvalidInt'), - ('string', '1'), - ('decimal', 'InvalidDecimal'), - ('float', 'InvalidFloat')]), + OrderedDict( + [ + ("int", "InvalidInt"), + ("string", "1"), + ("decimal", "InvalidDecimal"), + ("float", "InvalidFloat"), + ] + ), ] with pytest.warns(DataErrorWarning) as type_warnings: assert (expected, None, None) == run(sheets, schema) @@ -155,64 +175,56 @@ def test_type_conversion_with_schema(): @pytest.mark.xfail def test_merging_cols(): - ''' + """ This test demonstrates two problems: * Single rows are returned as a row, not as a list of rows with length 1 * Columns with the same name result in the first being overwritten and not appearing in the cell source map - ''' - sheets = [ - { - 'name': 'main', - 'headings': ['int', 'int'], - 'rows': [ - [1, 2], - ] - } - ] + """ + sheets = [{"name": "main", "headings": ["int", "int"], "rows": [[1, 2],]}] # XXX We don't correctly get a list of lists here, just [OrderedDict([(u'int', 2)])] - expected_result = [ - [OrderedDict([(u'int', 2)])] - ] + expected_result = [[OrderedDict([("int", 2)])]] # XXX Fails to keep the source map to cell B2 because the value is lost early on in # converting the row to a dictionary - expected_cell_source_map = OrderedDict([ - (u'main/0/int', [('main', 'A', 2, 'int'), ('main', 'B', 2, 'int')]), - (u'main/0', [('main', 2)]), - ]) - expected_heading_source_map = OrderedDict([ - (u'main/int', [('main', 'int')]), - ]) + expected_cell_source_map = OrderedDict( + [ + ("main/0/int", [("main", "A", 2, "int"), ("main", "B", 2, "int")]), + ("main/0", [("main", 2)]), + ] + ) + expected_heading_source_map = OrderedDict([("main/int", [("main", "int")]),]) expected = (expected_result, expected_cell_source_map, expected_heading_source_map) assert expected == run(sheets, source_maps=True) test_dict_data_result = [ - OrderedDict([('name', 'James'), ('address', OrderedDict([('house', '15')]))]), + OrderedDict([("name", "James"), ("address", OrderedDict([("house", "15")]))]), ] test_dict_data_sheets = [ { - 'name': 'main', - 'headings': ['name', 'address/house'], - 'rows': [ - ['James', '15'], - ], + "name": "main", + "headings": ["name", "address/house"], + "rows": [["James", "15"],], }, ] -test_dict_data_cell_source_map = OrderedDict([ - ('main/0/address/house', [('main', 'B', 2, 'address/house')]), - ('main/0/name', [('main', 'A', 2, 'name')]), - ('main/0/address', [('main', 2)]), - ('main/0', [('main', 2)]), -]) +test_dict_data_cell_source_map = OrderedDict( + [ + ("main/0/address/house", [("main", "B", 2, "address/house")]), + ("main/0/name", [("main", "A", 2, "name")]), + ("main/0/address", [("main", 2)]), + ("main/0", [("main", 2)]), + ] +) -test_dict_data_heading_source_map = OrderedDict([ - ('main/address/house', [('main', 'address/house')]), - ('main/name', [('main', 'name')]), -]) +test_dict_data_heading_source_map = OrderedDict( + [ + ("main/address/house", [("main", "address/house")]), + ("main/name", [("main", "name")]), + ] +) test_dict_data = [ # No schema case @@ -227,21 +239,17 @@ def test_merging_cols(): ( test_dict_data_sheets, { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'definitions': { - 'Address': { - 'type': 'object', - 'properties': { - 'house': {'type': 'string'}, - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "definitions": { + "Address": { + "type": "object", + "properties": {"house": {"type": "string"},}, } }, - 'type': 'object', - 'properties': { - 'name': {'type': 'string'}, - 'address': { - '$ref': '#/definitions/Address' - }, + "type": "object", + "properties": { + "name": {"type": "string"}, + "address": {"$ref": "#/definitions/Address"}, }, }, test_dict_data_result, @@ -252,23 +260,19 @@ def test_merging_cols(): ( test_dict_data_sheets, { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'title': 'Person Title', - 'definitions': { - 'Address': { - 'type': 'object', - 'title': 'Address Title', - 'properties': { - 'house': {'type': 'string'}, - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Person Title", + "definitions": { + "Address": { + "type": "object", + "title": "Address Title", + "properties": {"house": {"type": "string"},}, } }, - 'type': 'object', - 'properties': { - 'name': {'type': 'string'}, - 'address': { - '$ref': '#/definitions/Address' - } + "type": "object", + "properties": { + "name": {"type": "string"}, + "address": {"$ref": "#/definitions/Address"}, }, }, test_dict_data_result, @@ -279,126 +283,119 @@ def test_merging_cols(): ( [ { - 'name': 'main', - 'headings': [' NAmE TiTLe ', ' ADDresS TiTLe : HOusE TiTLe '], - 'rows': [ - ['James', '15'], - ], + "name": "main", + "headings": [" NAmE TiTLe ", " ADDresS TiTLe : HOusE TiTLe "], + "rows": [["James", "15"],], }, ], { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'title': 'Person Title', - 'definitions': { - 'Address': { - 'type': 'object', - 'title': 'Address Title', - 'properties': { - 'house': { - 'type': 'string', - 'title': 'House Title', - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Person Title", + "definitions": { + "Address": { + "type": "object", + "title": "Address Title", + "properties": { + "house": {"type": "string", "title": "House Title",}, }, } }, - 'type': 'object', - 'properties': { - 'name': { - 'type': 'string', - 'title': 'Name Title', - }, - 'address': { - '$ref': '#/definitions/Address' - }, + "type": "object", + "properties": { + "name": {"type": "string", "title": "Name Title",}, + "address": {"$ref": "#/definitions/Address"}, }, }, [ - OrderedDict([('name', 'James'), ('address', OrderedDict([('house', '15')]))]), + OrderedDict( + [("name", "James"), ("address", OrderedDict([("house", "15")]))] + ), ], - OrderedDict([ - ('main/0/address/house', [('main', 'B', 2, ' ADDresS TiTLe : HOusE TiTLe ')]), - ('main/0/name', [('main', 'A', 2, ' NAmE TiTLe ')]), - ('main/0/address', [('main', 2)]), - ('main/0', [('main', 2)]), - ]), - OrderedDict([ - ('main/address/house', [('main', ' ADDresS TiTLe : HOusE TiTLe ')]), - ('main/name', [('main', ' NAmE TiTLe ')]), - ]), - ) + OrderedDict( + [ + ( + "main/0/address/house", + [("main", "B", 2, " ADDresS TiTLe : HOusE TiTLe ")], + ), + ("main/0/name", [("main", "A", 2, " NAmE TiTLe ")]), + ("main/0/address", [("main", 2)]), + ("main/0", [("main", 2)]), + ] + ), + OrderedDict( + [ + ("main/address/house", [("main", " ADDresS TiTLe : HOusE TiTLe ")]), + ("main/name", [("main", " NAmE TiTLe ")]), + ] + ), + ), ] @pytest.mark.parametrize( - 'sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map', - test_dict_data + "sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map", + test_dict_data, ) -def test_dict(sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map): - result, cell_source_map, heading_source_map = run(sheets, schema=schema, source_maps=True) +def test_dict( + sheets, + schema, + expected_result, + expected_cell_source_map, + expected_heading_source_map, +): + result, cell_source_map, heading_source_map = run( + sheets, schema=schema, source_maps=True + ) assert expected_result == result assert expected_cell_source_map == cell_source_map assert expected_heading_source_map == heading_source_map test_list_of_dicts_data_result = [ - OrderedDict([ - ('name', 'James'), - ('address', [ - OrderedDict([ - ('house', '15'), - ]) - ])]) + OrderedDict([("name", "James"), ("address", [OrderedDict([("house", "15"),])])]) ] test_list_of_dicts_data_sheets = [ { - 'name': 'main', - 'headings': ['name', 'address/0/house'], - 'rows': [ - ['James', '15'], - ], + "name": "main", + "headings": ["name", "address/0/house"], + "rows": [["James", "15"],], }, ] -test_list_of_dicts_data_cell_source_map = OrderedDict([ - (u'main/0/address/0/house', [('main', 'B', 2, 'address/0/house')]), - (u'main/0/name', [('main', 'A', 2, 'name')]), - (u'main/0/address/0', [('main', 2)]), - (u'main/0', [('main', 2)]) -]) +test_list_of_dicts_data_cell_source_map = OrderedDict( + [ + ("main/0/address/0/house", [("main", "B", 2, "address/0/house")]), + ("main/0/name", [("main", "A", 2, "name")]), + ("main/0/address/0", [("main", 2)]), + ("main/0", [("main", 2)]), + ] +) -test_list_of_dicts_data_heading_source_map = OrderedDict([ - ('main/address/house', [('main', 'address/0/house')]), - ('main/name', [('main', 'name')]), -]) +test_list_of_dicts_data_heading_source_map = OrderedDict( + [ + ("main/address/house", [("main", "address/0/house")]), + ("main/name", [("main", "name")]), + ] +) test_list_of_dicts_data_schema_with_titles = { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'type': 'object', - 'title': 'Person Title', - 'definitions': { - 'Address': { - 'type': 'object', - 'title': 'Address Item Title', - 'properties': { - 'house': { - 'type': 'string', - 'title': 'House Title', - }, - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "title": "Person Title", + "definitions": { + "Address": { + "type": "object", + "title": "Address Item Title", + "properties": {"house": {"type": "string", "title": "House Title",},}, }, }, - 'properties': { - 'name': { - 'type': 'string', - 'title': 'Name Title', - }, - 'address': { - 'items': { - '$ref': '#/definitions/Address', - }, - 'type': 'array', - 'title': 'Address Title', + "properties": { + "name": {"type": "string", "title": "Name Title",}, + "address": { + "items": {"$ref": "#/definitions/Address",}, + "type": "array", + "title": "Address Title", }, }, } @@ -416,28 +413,20 @@ def test_dict(sheets, schema, expected_result, expected_cell_source_map, expecte ( test_list_of_dicts_data_sheets, { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'type': 'object', - 'definitions': { - 'Address': { - 'type': 'object', - 'title': 'Address Item Title', - 'properties': { - 'house': { - 'type': 'string', - }, - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "definitions": { + "Address": { + "type": "object", + "title": "Address Item Title", + "properties": {"house": {"type": "string",},}, }, }, - 'properties': { - 'name': { - 'type': 'string', - }, - 'address': { - 'items': { - '$ref': '#/definitions/Address', - }, - 'type': 'array', + "properties": { + "name": {"type": "string",}, + "address": { + "items": {"$ref": "#/definitions/Address",}, + "type": "array", }, }, }, @@ -457,113 +446,112 @@ def test_dict(sheets, schema, expected_result, expected_cell_source_map, expecte ( [ { - 'name': 'main', - 'headings': [' NAmE TiTLe ', ' ADDresS TiTLe : 0 : HOusE TiTLe '], - 'rows': [ - ['James', '15'], - ], + "name": "main", + "headings": [" NAmE TiTLe ", " ADDresS TiTLe : 0 : HOusE TiTLe "], + "rows": [["James", "15"],], }, ], test_list_of_dicts_data_schema_with_titles, test_list_of_dicts_data_result, - OrderedDict([ - (u'main/0/address/0/house', [('main', 'B', 2, ' ADDresS TiTLe : 0 : HOusE TiTLe ')]), - (u'main/0/name', [('main', 'A', 2, ' NAmE TiTLe ')]), - (u'main/0/address/0', [('main', 2)]), - (u'main/0', [('main', 2)]) - ]), - OrderedDict([ - ('main/address/house', [('main', ' ADDresS TiTLe : 0 : HOusE TiTLe ')]), - ('main/name', [('main', ' NAmE TiTLe ')]), - ]), - ) + OrderedDict( + [ + ( + "main/0/address/0/house", + [("main", "B", 2, " ADDresS TiTLe : 0 : HOusE TiTLe ")], + ), + ("main/0/name", [("main", "A", 2, " NAmE TiTLe ")]), + ("main/0/address/0", [("main", 2)]), + ("main/0", [("main", 2)]), + ] + ), + OrderedDict( + [ + ("main/address/house", [("main", " ADDresS TiTLe : 0 : HOusE TiTLe ")]), + ("main/name", [("main", " NAmE TiTLe ")]), + ] + ), + ), ] @pytest.mark.parametrize( - 'sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map', - test_list_of_dicts_data + "sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map", + test_list_of_dicts_data, ) -def test_list_of_dicts(sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map): - result, cell_source_map, heading_source_map = run(sheets, schema=schema, source_maps=True) +def test_list_of_dicts( + sheets, + schema, + expected_result, + expected_cell_source_map, + expected_heading_source_map, +): + result, cell_source_map, heading_source_map = run( + sheets, schema=schema, source_maps=True + ) assert expected_result == result assert expected_cell_source_map == cell_source_map assert expected_heading_source_map == heading_source_map test_list_of_dicts_with_ids_data_result = [ - OrderedDict([ - ('id', 'person1'), - ('name', 'James'), - ('address', [ - OrderedDict([ - ('id', 'address1'), - ('house', '15'), - ]) - ])]) + OrderedDict( + [ + ("id", "person1"), + ("name", "James"), + ("address", [OrderedDict([("id", "address1"), ("house", "15"),])]), + ] + ) ] test_list_of_dicts_with_ids_data_sheets = [ { - 'name': 'main', - 'headings': ['id', 'name', 'address/0/id', 'address/0/house'], - 'rows': [ - ['person1', 'James', 'address1', '15'], - ], + "name": "main", + "headings": ["id", "name", "address/0/id", "address/0/house"], + "rows": [["person1", "James", "address1", "15"],], }, ] -test_list_of_dicts_with_ids_data_cell_source_map = OrderedDict([ - (u'main/0/address/0/house', [('main', 'D', 2, 'address/0/house')]), - (u'main/0/address/0/id', [('main', 'C', 2, 'address/0/id')]), - (u'main/0/id', [('main', 'A', 2, 'id')]), - (u'main/0/name', [('main', 'B', 2, 'name')]), - (u'main/0/address/0', [('main', 2)]), - (u'main/0', [('main', 2)]) -]) - -test_list_of_dicts_with_ids_data_heading_source_map = OrderedDict([ - ('main/address/house', [('main', 'address/0/house')]), - ('main/address/id', [('main', 'address/0/id')]), - ('main/id', [('main', 'id')]), - ('main/name', [('main', 'name')]), -]) +test_list_of_dicts_with_ids_data_cell_source_map = OrderedDict( + [ + ("main/0/address/0/house", [("main", "D", 2, "address/0/house")]), + ("main/0/address/0/id", [("main", "C", 2, "address/0/id")]), + ("main/0/id", [("main", "A", 2, "id")]), + ("main/0/name", [("main", "B", 2, "name")]), + ("main/0/address/0", [("main", 2)]), + ("main/0", [("main", 2)]), + ] +) + +test_list_of_dicts_with_ids_data_heading_source_map = OrderedDict( + [ + ("main/address/house", [("main", "address/0/house")]), + ("main/address/id", [("main", "address/0/id")]), + ("main/id", [("main", "id")]), + ("main/name", [("main", "name")]), + ] +) test_list_of_dicts_with_ids_data_schema_with_titles = { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'type': 'object', - 'title': 'Person Title', - 'definitions': { - 'Address': { - 'type': 'object', - 'title': 'Address Item Title', - 'properties': { - 'house': { - 'type': 'string', - 'title': 'House Title', - }, - 'id': { - 'type': 'string', - 'title': 'Identifier', - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "title": "Person Title", + "definitions": { + "Address": { + "type": "object", + "title": "Address Item Title", + "properties": { + "house": {"type": "string", "title": "House Title",}, + "id": {"type": "string", "title": "Identifier",}, }, }, }, - 'properties': { - 'id': { - 'type': 'string', - 'title': 'Identifier', - }, - 'name': { - 'type': 'string', - 'title': 'Name Title', - }, - 'address': { - 'items': { - '$ref': '#/definitions/Address', - }, - 'type': 'array', - 'title': 'Address Title', + "properties": { + "id": {"type": "string", "title": "Identifier",}, + "name": {"type": "string", "title": "Name Title",}, + "address": { + "items": {"$ref": "#/definitions/Address",}, + "type": "array", + "title": "Address Title", }, }, } @@ -581,33 +569,23 @@ def test_list_of_dicts(sheets, schema, expected_result, expected_cell_source_map ( test_list_of_dicts_with_ids_data_sheets, { - '$schema': 'http://json-schema.org/draft-04/schema#', - 'type': 'object', - 'definitions': { - 'Address': { - 'type': 'object', - 'properties': { - 'house': { - 'type': 'string', - }, - 'id': { - 'type': 'string', - }, + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "definitions": { + "Address": { + "type": "object", + "properties": { + "house": {"type": "string",}, + "id": {"type": "string",}, }, }, }, - 'properties': { - 'id': { - 'type': 'string', - }, - 'name': { - 'type': 'string', - }, - 'address': { - 'items': { - '$ref': '#/definitions/Address', - }, - 'type': 'array', + "properties": { + "id": {"type": "string",}, + "name": {"type": "string",}, + "address": { + "items": {"$ref": "#/definitions/Address",}, + "type": "array", }, }, }, @@ -627,44 +605,60 @@ def test_list_of_dicts(sheets, schema, expected_result, expected_cell_source_map ( [ { - 'name': 'main', - 'headings': [ - ' IDENtifiER ', - ' NAmE TiTLe ', - ' ADDresS TiTLe : 0 : IDENtifiER ', - ' ADDresS TiTLe : 0 : HOusE TiTLe ' - ], - 'rows': [ - ['person1', 'James', 'address1', '15'], + "name": "main", + "headings": [ + " IDENtifiER ", + " NAmE TiTLe ", + " ADDresS TiTLe : 0 : IDENtifiER ", + " ADDresS TiTLe : 0 : HOusE TiTLe ", ], + "rows": [["person1", "James", "address1", "15"],], }, ], test_list_of_dicts_with_ids_data_schema_with_titles, test_list_of_dicts_with_ids_data_result, - OrderedDict([ - (u'main/0/address/0/house', [('main', 'D', 2, ' ADDresS TiTLe : 0 : HOusE TiTLe ')]), - (u'main/0/address/0/id', [('main', 'C', 2, ' ADDresS TiTLe : 0 : IDENtifiER ')]), - (u'main/0/id', [('main', 'A', 2, ' IDENtifiER ')]), - (u'main/0/name', [('main', 'B', 2, ' NAmE TiTLe ')]), - (u'main/0/address/0', [('main', 2)]), - (u'main/0', [('main', 2)]) - ]), - OrderedDict([ - ('main/address/house', [('main', ' ADDresS TiTLe : 0 : HOusE TiTLe ')]), - ('main/address/id', [('main', ' ADDresS TiTLe : 0 : IDENtifiER ')]), - ('main/id', [('main', ' IDENtifiER ')]), - ('main/name', [('main', ' NAmE TiTLe ')]), - ]), - ) + OrderedDict( + [ + ( + "main/0/address/0/house", + [("main", "D", 2, " ADDresS TiTLe : 0 : HOusE TiTLe ")], + ), + ( + "main/0/address/0/id", + [("main", "C", 2, " ADDresS TiTLe : 0 : IDENtifiER ")], + ), + ("main/0/id", [("main", "A", 2, " IDENtifiER ")]), + ("main/0/name", [("main", "B", 2, " NAmE TiTLe ")]), + ("main/0/address/0", [("main", 2)]), + ("main/0", [("main", 2)]), + ] + ), + OrderedDict( + [ + ("main/address/house", [("main", " ADDresS TiTLe : 0 : HOusE TiTLe ")]), + ("main/address/id", [("main", " ADDresS TiTLe : 0 : IDENtifiER ")]), + ("main/id", [("main", " IDENtifiER ")]), + ("main/name", [("main", " NAmE TiTLe ")]), + ] + ), + ), ] @pytest.mark.parametrize( - 'sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map', - test_list_of_dicts_with_ids_data + "sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map", + test_list_of_dicts_with_ids_data, ) -def test_list_of_dicts_with_ids(sheets, schema, expected_result, expected_cell_source_map, expected_heading_source_map): - result, cell_source_map, heading_source_map = run(sheets, schema=schema, source_maps=True) +def test_list_of_dicts_with_ids( + sheets, + schema, + expected_result, + expected_cell_source_map, + expected_heading_source_map, +): + result, cell_source_map, heading_source_map = run( + sheets, schema=schema, source_maps=True + ) assert expected_result == result assert expected_cell_source_map == cell_source_map assert expected_heading_source_map == heading_source_map @@ -675,290 +669,320 @@ def test_list_of_dicts_with_ids(sheets, schema, expected_result, expected_cell_s ( [ { - 'name': 'main', - 'headings': ['id', 'name'], - 'rows': [ - ['PERSON-james', 'James'], - ['PERSON-bob', 'Bob'], - ] + "name": "main", + "headings": ["id", "name"], + "rows": [["PERSON-james", "James"], ["PERSON-bob", "Bob"],], }, { - 'name': 'addresses', - 'headings': ['id', 'address/0/house', 'address/0/town'], - 'rows': [ - ['PERSON-james', '1', 'London'], - ['PERSON-james', '2', 'Birmingham'], - ['PERSON-bob', '3', 'Leeds'], - ['PERSON-bob', '4', 'Manchester'], - ] + "name": "addresses", + "headings": ["id", "address/0/house", "address/0/town"], + "rows": [ + ["PERSON-james", "1", "London"], + ["PERSON-james", "2", "Birmingham"], + ["PERSON-bob", "3", "Leeds"], + ["PERSON-bob", "4", "Manchester"], + ], }, ], - OrderedDict([ - # Cells - ('main/0/address/0/house', [('addresses', 'B', 2, 'address/0/house')]), - ('main/0/address/0/town', [('addresses', 'C', 2, 'address/0/town')]), - ('main/0/address/1/house', [('addresses', 'B', 3, 'address/0/house')]), - ('main/0/address/1/town', [('addresses', 'C', 3, 'address/0/town')]), - ('main/0/id', [('main', 'A', 2, 'id'), ('addresses', 'A', 2, 'id'), ('addresses', 'A', 3, 'id')]), - ('main/0/name', [('main', 'B', 2, 'name')]), - ('main/1/address/0/house', [('addresses', 'B', 4, 'address/0/house')]), - ('main/1/address/0/town', [('addresses', 'C', 4, 'address/0/town')]), - ('main/1/address/1/house', [('addresses', 'B', 5, 'address/0/house')]), - ('main/1/address/1/town', [('addresses', 'C', 5, 'address/0/town')]), - ('main/1/id', [('main', 'A', 3, 'id'), ('addresses', 'A', 4, 'id'), ('addresses', 'A', 5, 'id')]), - ('main/1/name', [('main', 'B', 3, 'name')]), - # Rows - ('main/0/address/0', [('addresses', 2)]), - ('main/0/address/1', [('addresses', 3)]), - ('main/0', [('main', 2), ('addresses', 2), ('addresses', 3)]), - ('main/1/address/0', [('addresses', 4)]), - ('main/1/address/1', [('addresses', 5)]), - ('main/1', [('main', 3), ('addresses', 4), ('addresses', 5)]) - ]), - OrderedDict([ - ('main/address/house', [('addresses', 'address/0/house')]), - ('main/address/town', [('addresses', 'address/0/town')]), - ('main/id', [('main', 'id'), ('addresses', 'id')]), - ('main/name', [('main', 'name')]) - ]), + OrderedDict( + [ + # Cells + ("main/0/address/0/house", [("addresses", "B", 2, "address/0/house")]), + ("main/0/address/0/town", [("addresses", "C", 2, "address/0/town")]), + ("main/0/address/1/house", [("addresses", "B", 3, "address/0/house")]), + ("main/0/address/1/town", [("addresses", "C", 3, "address/0/town")]), + ( + "main/0/id", + [ + ("main", "A", 2, "id"), + ("addresses", "A", 2, "id"), + ("addresses", "A", 3, "id"), + ], + ), + ("main/0/name", [("main", "B", 2, "name")]), + ("main/1/address/0/house", [("addresses", "B", 4, "address/0/house")]), + ("main/1/address/0/town", [("addresses", "C", 4, "address/0/town")]), + ("main/1/address/1/house", [("addresses", "B", 5, "address/0/house")]), + ("main/1/address/1/town", [("addresses", "C", 5, "address/0/town")]), + ( + "main/1/id", + [ + ("main", "A", 3, "id"), + ("addresses", "A", 4, "id"), + ("addresses", "A", 5, "id"), + ], + ), + ("main/1/name", [("main", "B", 3, "name")]), + # Rows + ("main/0/address/0", [("addresses", 2)]), + ("main/0/address/1", [("addresses", 3)]), + ("main/0", [("main", 2), ("addresses", 2), ("addresses", 3)]), + ("main/1/address/0", [("addresses", 4)]), + ("main/1/address/1", [("addresses", 5)]), + ("main/1", [("main", 3), ("addresses", 4), ("addresses", 5)]), + ] + ), + OrderedDict( + [ + ("main/address/house", [("addresses", "address/0/house")]), + ("main/address/town", [("addresses", "address/0/town")]), + ("main/id", [("main", "id"), ("addresses", "id")]), + ("main/name", [("main", "name")]), + ] + ), ( [ - 'addresses:A2', - 'addresses:A3', - 'addresses:A4', - 'addresses:A5', - 'addresses:B2', - 'addresses:B3', - 'addresses:B4', - 'addresses:B5', - 'addresses:C2', - 'addresses:C3', - 'addresses:C4', - 'addresses:C5', - 'main:A2', - 'main:A3', - 'main:B2', - 'main:B3', + "addresses:A2", + "addresses:A3", + "addresses:A4", + "addresses:A5", + "addresses:B2", + "addresses:B3", + "addresses:B4", + "addresses:B5", + "addresses:C2", + "addresses:C3", + "addresses:C4", + "addresses:C5", + "main:A2", + "main:A3", + "main:B2", + "main:B3", ], { - 'main:2': 1, - 'main:3': 1, - 'addresses:2': 2, - 'addresses:3': 2, - 'addresses:5': 2, - 'addresses:4': 2, - } + "main:2": 1, + "main:3": 1, + "addresses:2": 2, + "addresses:3": 2, + "addresses:5": 2, + "addresses:4": 2, + }, ), [ - 'addresses:address/0/house', - 'addresses:address/0/town', - 'addresses:id', - 'main:id', - 'main:name' - ] + "addresses:address/0/house", + "addresses:address/0/town", + "addresses:id", + "main:id", + "main:name", + ], ), ( # New columns for each item of the array [ { - 'name': 'main', - 'headings': ['id', 'name', 'address/0/house', 'address/0/town', 'address/1/house', 'address/1/town'], - 'rows': [ - ['PERSON-james', 'James', '1', 'London', '2', 'Birmingham'], - ['PERSON-bob', 'Bob', '3', 'Leeds', '4', 'Manchester'], - ] + "name": "main", + "headings": [ + "id", + "name", + "address/0/house", + "address/0/town", + "address/1/house", + "address/1/town", + ], + "rows": [ + ["PERSON-james", "James", "1", "London", "2", "Birmingham"], + ["PERSON-bob", "Bob", "3", "Leeds", "4", "Manchester"], + ], }, ], - OrderedDict([ - ('main/0/address/0/house', [('main', 'C', 2, 'address/0/house')]), - ('main/0/address/0/town', [('main', 'D', 2, 'address/0/town')]), - ('main/0/address/1/house', [('main', 'E', 2, 'address/1/house')]), - ('main/0/address/1/town', [('main', 'F', 2, 'address/1/town')]), - ('main/0/id', [('main', 'A', 2, 'id')]), - ('main/0/name', [('main', 'B', 2, 'name')]), - ('main/1/address/0/house', [('main', 'C', 3, 'address/0/house')]), - ('main/1/address/0/town', [('main', 'D', 3, 'address/0/town')]), - ('main/1/address/1/house', [('main', 'E', 3, 'address/1/house')]), - ('main/1/address/1/town', [('main', 'F', 3, 'address/1/town')]), - ('main/1/id', [('main', 'A', 3, 'id')]), - ('main/1/name', [('main', 'B', 3, 'name')]), - ('main/0/address/0', [('main', 2)]), - ('main/0/address/1', [('main', 2)]), - ('main/0', [('main', 2)]), - ('main/1/address/0', [('main', 3)]), - ('main/1/address/1', [('main', 3)]), - ('main/1', [('main', 3)]) - ]), - OrderedDict([ - # Note that you get two headings because there are two de-normalised versions - ('main/address/house', [('main', 'address/0/house'), ('main', 'address/1/house')]), - ('main/address/town', [('main', 'address/0/town'), ('main', 'address/1/town')]), - ('main/id', [('main', 'id')]), - ('main/name', [('main', 'name')]) - ]), + OrderedDict( + [ + ("main/0/address/0/house", [("main", "C", 2, "address/0/house")]), + ("main/0/address/0/town", [("main", "D", 2, "address/0/town")]), + ("main/0/address/1/house", [("main", "E", 2, "address/1/house")]), + ("main/0/address/1/town", [("main", "F", 2, "address/1/town")]), + ("main/0/id", [("main", "A", 2, "id")]), + ("main/0/name", [("main", "B", 2, "name")]), + ("main/1/address/0/house", [("main", "C", 3, "address/0/house")]), + ("main/1/address/0/town", [("main", "D", 3, "address/0/town")]), + ("main/1/address/1/house", [("main", "E", 3, "address/1/house")]), + ("main/1/address/1/town", [("main", "F", 3, "address/1/town")]), + ("main/1/id", [("main", "A", 3, "id")]), + ("main/1/name", [("main", "B", 3, "name")]), + ("main/0/address/0", [("main", 2)]), + ("main/0/address/1", [("main", 2)]), + ("main/0", [("main", 2)]), + ("main/1/address/0", [("main", 3)]), + ("main/1/address/1", [("main", 3)]), + ("main/1", [("main", 3)]), + ] + ), + OrderedDict( + [ + # Note that you get two headings because there are two de-normalised versions + ( + "main/address/house", + [("main", "address/0/house"), ("main", "address/1/house")], + ), + ( + "main/address/town", + [("main", "address/0/town"), ("main", "address/1/town")], + ), + ("main/id", [("main", "id")]), + ("main/name", [("main", "name")]), + ] + ), ( [ - 'main:A2', - 'main:A3', - 'main:B2', - 'main:B3', - 'main:C2', - 'main:C3', - 'main:D2', - 'main:D3', - 'main:E2', - 'main:E3', - 'main:F2', - 'main:F3', + "main:A2", + "main:A3", + "main:B2", + "main:B3", + "main:C2", + "main:C3", + "main:D2", + "main:D3", + "main:E2", + "main:E3", + "main:F2", + "main:F3", ], { # XXX Note that this is 3 since there are 3 unique dictionaries - 'main:2': 3, - 'main:3': 3, - } + "main:2": 3, + "main:3": 3, + }, ), [ - 'main:address/0/house', - 'main:address/0/town', - 'main:address/1/house', - 'main:address/1/town', - 'main:id', - 'main:name', - ] + "main:address/0/house", + "main:address/0/town", + "main:address/1/house", + "main:address/1/town", + "main:id", + "main:name", + ], ), ( # Repeated rows [ { - 'name': 'main', - 'headings': ['id', 'name', 'address/0/house', 'address/0/town'], - 'rows': [ - ['PERSON-james', 'James', '1', 'London'], - ['PERSON-james', 'James', '2', 'Birmingham'], - ['PERSON-bob', 'Bob', '3', 'Leeds'], - ['PERSON-bob', 'Bob', '4', 'Manchester'], - ] + "name": "main", + "headings": ["id", "name", "address/0/house", "address/0/town"], + "rows": [ + ["PERSON-james", "James", "1", "London"], + ["PERSON-james", "James", "2", "Birmingham"], + ["PERSON-bob", "Bob", "3", "Leeds"], + ["PERSON-bob", "Bob", "4", "Manchester"], + ], }, ], - OrderedDict([ - ('main/0/address/0/house', [('main', 'C', 2, 'address/0/house')]), - ('main/0/address/0/town', [('main', 'D', 2, 'address/0/town')]), - ('main/0/address/1/house', [('main', 'C', 3, 'address/0/house')]), - ('main/0/address/1/town', [('main', 'D', 3, 'address/0/town')]), - ('main/0/id', [('main', 'A', 2, 'id'), ('main', 'A', 3, 'id')]), - ('main/0/name', [('main', 'B', 2, 'name'), ('main', 'B', 3, 'name')]), - - ('main/1/address/0/house', [('main', 'C', 4, 'address/0/house')]), - ('main/1/address/0/town', [('main', 'D', 4, 'address/0/town')]), - ('main/1/address/1/house', [('main', 'C', 5, 'address/0/house')]), - ('main/1/address/1/town', [('main', 'D', 5, 'address/0/town')]), - ('main/1/id', [('main', 'A', 4, 'id'), ('main', 'A', 5, 'id')]), - ('main/1/name', [('main', 'B', 4, 'name'), ('main', 'B', 5, 'name')]), - - ('main/0/address/0', [('main', 2)]), - ('main/0/address/1', [('main', 3)]), - ('main/0', [('main', 2), ('main', 3)]), - - ('main/1/address/0', [('main', 4)]), - ('main/1/address/1', [('main', 5)]), - ('main/1', [('main', 4), ('main', 5)]) - ]), - OrderedDict([ - ('main/address/house', [('main', 'address/0/house')]), - ('main/address/town', [('main', 'address/0/town')]), - ('main/id', [('main', 'id')]), - ('main/name', [('main', 'name')]) - ]), + OrderedDict( + [ + ("main/0/address/0/house", [("main", "C", 2, "address/0/house")]), + ("main/0/address/0/town", [("main", "D", 2, "address/0/town")]), + ("main/0/address/1/house", [("main", "C", 3, "address/0/house")]), + ("main/0/address/1/town", [("main", "D", 3, "address/0/town")]), + ("main/0/id", [("main", "A", 2, "id"), ("main", "A", 3, "id")]), + ("main/0/name", [("main", "B", 2, "name"), ("main", "B", 3, "name")]), + ("main/1/address/0/house", [("main", "C", 4, "address/0/house")]), + ("main/1/address/0/town", [("main", "D", 4, "address/0/town")]), + ("main/1/address/1/house", [("main", "C", 5, "address/0/house")]), + ("main/1/address/1/town", [("main", "D", 5, "address/0/town")]), + ("main/1/id", [("main", "A", 4, "id"), ("main", "A", 5, "id")]), + ("main/1/name", [("main", "B", 4, "name"), ("main", "B", 5, "name")]), + ("main/0/address/0", [("main", 2)]), + ("main/0/address/1", [("main", 3)]), + ("main/0", [("main", 2), ("main", 3)]), + ("main/1/address/0", [("main", 4)]), + ("main/1/address/1", [("main", 5)]), + ("main/1", [("main", 4), ("main", 5)]), + ] + ), + OrderedDict( + [ + ("main/address/house", [("main", "address/0/house")]), + ("main/address/town", [("main", "address/0/town")]), + ("main/id", [("main", "id")]), + ("main/name", [("main", "name")]), + ] + ), ( [ - 'main:A2', - 'main:A3', - 'main:A4', - 'main:A5', - 'main:B2', - 'main:B3', - 'main:B4', - 'main:B5', - 'main:C2', - 'main:C3', - 'main:C4', - 'main:C5', - 'main:D2', - 'main:D3', - 'main:D4', - 'main:D5', + "main:A2", + "main:A3", + "main:A4", + "main:A5", + "main:B2", + "main:B3", + "main:B4", + "main:B5", + "main:C2", + "main:C3", + "main:C4", + "main:C5", + "main:D2", + "main:D3", + "main:D4", + "main:D5", ], - { - 'main:2': 2, - 'main:3': 2, - 'main:5': 2, - 'main:4': 2 - } + {"main:2": 2, "main:3": 2, "main:5": 2, "main:4": 2}, ), - [ - 'main:address/0/house', - 'main:address/0/town', - 'main:id', - 'main:name', - ], - ) + ["main:address/0/house", "main:address/0/town", "main:id", "main:name",], + ), ) @pytest.mark.parametrize( ( - 'sheets,' - 'expected_cell_source_map,' - 'expected_heading_source_map,' - 'expected_original_cell_and_row_locations,' - 'expected_original_heading_locations' + "sheets," + "expected_cell_source_map," + "expected_heading_source_map," + "expected_original_cell_and_row_locations," + "expected_original_heading_locations" ), - test_arrangement_data_sheets + test_arrangement_data_sheets, ) def test_arrangement( sheets, expected_cell_source_map, expected_heading_source_map, expected_original_cell_and_row_locations, - expected_original_heading_locations + expected_original_heading_locations, ): expected_result = [ - OrderedDict([ - ('id', 'PERSON-james'), - ('name', 'James'), - ('address', [ - OrderedDict([ - ('house', '1'), - ('town', 'London'), - ]), - OrderedDict([ - ('house', '2'), - ('town', 'Birmingham'), - ]) - ]), - ]), - OrderedDict([ - ('id', 'PERSON-bob'), - ('name', 'Bob'), - ('address', [ - OrderedDict([ - ('house', '3'), - ('town', 'Leeds'), - ]), - OrderedDict([ - ('house', '4'), - ('town', 'Manchester'), - ]) - ]), - ]), + OrderedDict( + [ + ("id", "PERSON-james"), + ("name", "James"), + ( + "address", + [ + OrderedDict([("house", "1"), ("town", "London"),]), + OrderedDict([("house", "2"), ("town", "Birmingham"),]), + ], + ), + ] + ), + OrderedDict( + [ + ("id", "PERSON-bob"), + ("name", "Bob"), + ( + "address", + [ + OrderedDict([("house", "3"), ("town", "Leeds"),]), + OrderedDict([("house", "4"), ("town", "Manchester"),]), + ], + ), + ] + ), ] - actual_result, actual_cell_source_map, actual_heading_source_map = run(sheets, source_maps=True) - actual_original_cell_and_row_locations = original_cell_and_row_locations(actual_cell_source_map or {}) - actual_original_heading_locations = original_headings(actual_heading_source_map or {}) + actual_result, actual_cell_source_map, actual_heading_source_map = run( + sheets, source_maps=True + ) + actual_original_cell_and_row_locations = original_cell_and_row_locations( + actual_cell_source_map or {} + ) + actual_original_heading_locations = original_headings( + actual_heading_source_map or {} + ) assert expected_result == actual_result assert expected_cell_source_map == actual_cell_source_map assert expected_heading_source_map == actual_heading_source_map - assert expected_original_cell_and_row_locations == actual_original_cell_and_row_locations + assert ( + expected_original_cell_and_row_locations + == actual_original_cell_and_row_locations + ) assert expected_original_heading_locations == actual_original_heading_locations @@ -983,36 +1007,36 @@ def run(sheets, schema=None, source_maps=False): input_sheets = OrderedDict() for sheet in sheets: rows = [] - for row in sheet['rows']: - rows.append(OrderedDict(zip(sheet['headings'], row))) - input_sheets[sheet['name']] = rows - input_headings[sheet['name']] = sheet['headings'] + for row in sheet["rows"]: + rows.append(OrderedDict(zip(sheet["headings"], row))) + input_sheets[sheet["name"]] = rows + input_headings[sheet["name"]] = sheet["headings"] if schema is not None: spreadsheet_input = HeadingListInput( input_sheets, input_headings, - root_id='', # QUESTION: I don't understand root_id - convert_titles=True, # Without this, the titles aren't understood + root_id="", # QUESTION: I don't understand root_id + convert_titles=True, # Without this, the titles aren't understood ) # Without this, the $ref entries in the schema aren't resolved. dereferenced_schema = JsonRef.replace_refs(schema) # raise Exception(dereferenced_schema) parser = SchemaParser( - root_schema_dict=dereferenced_schema, - root_id='main', - rollup=True + root_schema_dict=dereferenced_schema, root_id="main", rollup=True ) parser.parse() spreadsheet_input.parser = parser else: - spreadsheet_input = HeadingListInput( - input_sheets, - input_headings, - root_id='', - ) + spreadsheet_input = HeadingListInput(input_sheets, input_headings, root_id="",) spreadsheet_input.read_sheets() if source_maps: - result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(with_cell_source_map=True, with_heading_source_map=True) + ( + result, + cell_source_map_data, + heading_source_map_data, + ) = spreadsheet_input.fancy_unflatten( + with_cell_source_map=True, with_heading_source_map=True + ) return result, cell_source_map_data, heading_source_map_data else: return spreadsheet_input.unflatten(), None, None diff --git a/flattentool/tests/test_headings.py b/flattentool/tests/test_headings.py index 3ac657b7..668036f0 100644 --- a/flattentool/tests/test_headings.py +++ b/flattentool/tests/test_headings.py @@ -1,25 +1,24 @@ from collections import OrderedDict -from flattentool.input import SpreadsheetInput -from flattentool.schema import SchemaParser -from flattentool.exceptions import DataErrorWarning -from jsonref import JsonRef + import pytest +from jsonref import JsonRef +from flattentool.exceptions import DataErrorWarning +from flattentool.input import SpreadsheetInput +from flattentool.schema import SchemaParser test_heading_warning_data = [ ( - ['a', 'a'], + ["a", "a"], [ # Check we use the later values [1, 2], ], - [ - 'Duplicate heading "a" found, ignoring the data in column A.' - ], - ([OrderedDict([('a', 2)])], None, None), + ['Duplicate heading "a" found, ignoring the data in column A.'], + ([OrderedDict([("a", 2)])], None, None), ), ( - ['a', 'b', 'c', 'b', 'c', 'c', 'd', 'd', 'd', 'd'], + ["a", "b", "c", "b", "c", "c", "d", "d", "d", "d"], [ # Check for warnings even with empty cells [1,], @@ -29,23 +28,18 @@ 'Duplicate heading "c" found, ignoring the data in columns C and E.', 'Duplicate heading "d" found, ignoring the data in columns G, H and I.', ], - ([OrderedDict([('a', 1)])], None, None), + ([OrderedDict([("a", 1)])], None, None), ), ] @pytest.mark.parametrize( - 'headings, rows, expected_warnings, expected_result', - test_heading_warning_data + "headings, rows, expected_warnings, expected_result", test_heading_warning_data ) -def test_duplicate_headings_give_warning(headings, rows, expected_warnings, expected_result): - sheets = [ - { - 'name': 'main', - 'headings': headings, - 'rows': rows, - } - ] +def test_duplicate_headings_give_warning( + headings, rows, expected_warnings, expected_result +): + sheets = [{"name": "main", "headings": headings, "rows": rows,}] with pytest.warns(DataErrorWarning) as type_warnings: result = run(sheets) # Check that only one warning was raised @@ -77,36 +71,38 @@ def run(sheets, schema=None, source_maps=False): input_sheets = OrderedDict() for sheet in sheets: rows = [] - for row in sheet['rows']: - rows.append(OrderedDict(zip(sheet['headings'], row))) - input_sheets[sheet['name']] = rows - input_headings[sheet['name']] = sheet['headings'] + for row in sheet["rows"]: + rows.append(OrderedDict(zip(sheet["headings"], row))) + input_sheets[sheet["name"]] = rows + input_headings[sheet["name"]] = sheet["headings"] if schema is not None: spreadsheet_input = HeadingListInput( input_sheets, input_headings, - root_id='', + root_id="", # Without this, titles from a schema aren't understood convert_titles=True, ) # Without this, the $ref entries in the schema aren't resolved. dereferenced_schema = JsonRef.replace_refs(schema) parser = SchemaParser( - root_schema_dict=dereferenced_schema, - root_id='main', - rollup=True + root_schema_dict=dereferenced_schema, root_id="main", rollup=True ) parser.parse() spreadsheet_input.parser = parser else: - spreadsheet_input = HeadingListInput( - input_sheets, - input_headings, - root_id='', - ) + spreadsheet_input = HeadingListInput(input_sheets, input_headings, root_id="",) spreadsheet_input.read_sheets() if source_maps: - result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(True, True) + ( + result, + cell_source_map_data, + heading_source_map_data, + ) = spreadsheet_input.fancy_unflatten(True, True) else: - result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(False, False) + ( + result, + cell_source_map_data, + heading_source_map_data, + ) = spreadsheet_input.fancy_unflatten(False, False) return result, cell_source_map_data, heading_source_map_data diff --git a/flattentool/tests/test_init.py b/flattentool/tests/test_init.py index e884575a..e0f1cc21 100644 --- a/flattentool/tests/test_init.py +++ b/flattentool/tests/test_init.py @@ -1,18 +1,20 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from flattentool import decimal_default, unflatten -from decimal import Decimal + import json -import sys +from decimal import Decimal + import pytest +from flattentool import decimal_default, unflatten + def original_cell_and_row_locations(data): - ''' + """ Cells should each appear only once Rows should appear the number of times a column in it resolves to a unique dictionary - ''' + """ cells = [] rows = {} for key in data: @@ -20,14 +22,14 @@ def original_cell_and_row_locations(data): for cell in cell_list: if len(cell) == 2: # This is a row - row_str = '{}:{}'.format(cell[0], cell[1]) + row_str = "{}:{}".format(cell[0], cell[1]) if row_str not in rows: rows[row_str] = 1 else: rows[row_str] += 1 else: # This is a cell - cell_str = '{}:{}{}'.format(cell[0], cell[1], cell[2]) + cell_str = "{}:{}{}".format(cell[0], cell[1], cell[2]) assert cell_str not in cells cells.append(cell_str) cells.sort() @@ -35,14 +37,14 @@ def original_cell_and_row_locations(data): def original_headings(heading_data): - '''\ - ''' + """\ + """ headings = [] for key in heading_data: cell_list = heading_data[key] for cell in cell_list: assert len(cell) == 2 - heading_str = '{}:{}'.format(cell[0], cell[1]) + heading_str = "{}:{}".format(cell[0], cell[1]) assert heading_str not in headings headings.append(heading_str) headings.sort() @@ -50,13 +52,13 @@ def original_headings(heading_data): def test_decimal_default(): - assert json.dumps(Decimal('1.2'), default=decimal_default) == '1.2' - assert json.dumps(Decimal('42'), default=decimal_default) == '42' + assert json.dumps(Decimal("1.2"), default=decimal_default) == "1.2" + assert json.dumps(Decimal("42"), default=decimal_default) == "42" def lines_strip_whitespace(text): - lines = text.split('\n') - return '\n'.join(line.strip() for line in lines) + lines = text.split("\n") + return "\n".join(line.strip() for line in lines) def test_unflatten(tmpdir): @@ -66,39 +68,39 @@ def test_unflatten(tmpdir): Notable things we are checking for: Ordering is preseved - both the order of columns and rows """ - input_dir = tmpdir.ensure('release_input', dir=True) - input_dir.join('main.csv').write( - 'ocid,id,testA,test/id,test/C\n' - '1,2,3,4,5\n' - '1,2a,3a,4a,5a\n' - '6,7,8,9,10\n' - '6,7a,8a,9a,10a\n' + input_dir = tmpdir.ensure("release_input", dir=True) + input_dir.join("main.csv").write( + "ocid,id,testA,test/id,test/C\n" + "1,2,3,4,5\n" + "1,2a,3a,4a,5a\n" + "6,7,8,9,10\n" + "6,7a,8a,9a,10a\n" ) - input_dir.join('subsheet.csv').write( - 'ocid,id,sub/0/id,sub/0/testD,sub/0/test2/E,sub/0/test2/F\n' - '1,2,S1,11,12,13\n' - '1,2a,S1,14,15,16\n' - '1,2,S2,17,18,19\n' - '6,7,S1,20,21,22\n' + input_dir.join("subsheet.csv").write( + "ocid,id,sub/0/id,sub/0/testD,sub/0/test2/E,sub/0/test2/F\n" + "1,2,S1,11,12,13\n" + "1,2a,S1,14,15,16\n" + "1,2,S2,17,18,19\n" + "6,7,S1,20,21,22\n" ) - input_dir.join('subsheet_test.csv').write( - 'ocid,id,test/id,test/subsheet/0/id,test/subsheet/0/testD,test/subsheet/0/test2/E,test/subsheet/0/test2/F\n' - '1,2,4,S3,24,25,26\n' + input_dir.join("subsheet_test.csv").write( + "ocid,id,test/id,test/subsheet/0/id,test/subsheet/0/testD,test/subsheet/0/test2/E,test/subsheet/0/test2/F\n" + "1,2,4,S3,24,25,26\n" ) - input_dir.join('subsubsheet.csv').write( - 'ocid,id,sub/0/id,sub/0/subsub/0/testG\n' - '1,2,S1,23\n' + input_dir.join("subsubsheet.csv").write( + "ocid,id,sub/0/id,sub/0/subsub/0/testG\n" "1,2,S1,23\n" ) unflatten( input_dir.strpath, - input_format='csv', - output_name=tmpdir.join('release.json').strpath, - main_sheet_name='main', - cell_source_map=tmpdir.join('cell_source_map.json').strpath, - heading_source_map=tmpdir.join('heading_source_map.json').strpath) + input_format="csv", + output_name=tmpdir.join("release.json").strpath, + main_sheet_name="main", + cell_source_map=tmpdir.join("cell_source_map.json").strpath, + heading_source_map=tmpdir.join("heading_source_map.json").strpath, + ) # Note, "main/0/testA": comes after "main/0/test" because 'testA' > 'testA' # Note also that all the row entries come after the cell ones - expected = '''{ + expected = """{ "main/0/id": [ [ "main", @@ -657,83 +659,85 @@ def test_unflatten(tmpdir): 5 ] ] - }''' - assert lines_strip_whitespace(tmpdir.join('cell_source_map.json').read()) == lines_strip_whitespace(expected) + }""" + assert lines_strip_whitespace( + tmpdir.join("cell_source_map.json").read() + ) == lines_strip_whitespace(expected) data = json.loads(expected) - cells, rows = original_cell_and_row_locations(data) + cells, rows = original_cell_and_row_locations(data) # Make sure every cell in the original appeared in the cell source map exactly once assert cells == [ - 'main:A2', - 'main:A3', - 'main:A4', - 'main:A5', - 'main:B2', - 'main:B3', - 'main:B4', - 'main:B5', - 'main:C2', - 'main:C3', - 'main:C4', - 'main:C5', - 'main:D2', - 'main:D3', - 'main:D4', - 'main:D5', - 'main:E2', - 'main:E3', - 'main:E4', - 'main:E5', - 'subsheet:A2', - 'subsheet:A3', - 'subsheet:A4', - 'subsheet:A5', - 'subsheet:B2', - 'subsheet:B3', - 'subsheet:B4', - 'subsheet:B5', - 'subsheet:C2', - 'subsheet:C3', - 'subsheet:C4', - 'subsheet:C5', - 'subsheet:D2', - 'subsheet:D3', - 'subsheet:D4', - 'subsheet:D5', - 'subsheet:E2', - 'subsheet:E3', - 'subsheet:E4', - 'subsheet:E5', - 'subsheet:F2', - 'subsheet:F3', - 'subsheet:F4', - 'subsheet:F5', - 'subsheet_test:A2', - 'subsheet_test:B2', - 'subsheet_test:C2', - 'subsheet_test:D2', - 'subsheet_test:E2', - 'subsheet_test:F2', - 'subsheet_test:G2', - 'subsubsheet:A2', - 'subsubsheet:B2', - 'subsubsheet:C2', - 'subsubsheet:D2' + "main:A2", + "main:A3", + "main:A4", + "main:A5", + "main:B2", + "main:B3", + "main:B4", + "main:B5", + "main:C2", + "main:C3", + "main:C4", + "main:C5", + "main:D2", + "main:D3", + "main:D4", + "main:D5", + "main:E2", + "main:E3", + "main:E4", + "main:E5", + "subsheet:A2", + "subsheet:A3", + "subsheet:A4", + "subsheet:A5", + "subsheet:B2", + "subsheet:B3", + "subsheet:B4", + "subsheet:B5", + "subsheet:C2", + "subsheet:C3", + "subsheet:C4", + "subsheet:C5", + "subsheet:D2", + "subsheet:D3", + "subsheet:D4", + "subsheet:D5", + "subsheet:E2", + "subsheet:E3", + "subsheet:E4", + "subsheet:E5", + "subsheet:F2", + "subsheet:F3", + "subsheet:F4", + "subsheet:F5", + "subsheet_test:A2", + "subsheet_test:B2", + "subsheet_test:C2", + "subsheet_test:D2", + "subsheet_test:E2", + "subsheet_test:F2", + "subsheet_test:G2", + "subsubsheet:A2", + "subsubsheet:B2", + "subsubsheet:C2", + "subsubsheet:D2", ] # Make sure every row in the original appeared the number of times a column in it resolves to a unique dictionary assert rows == { - 'main:2': 2, - 'main:3': 2, - 'main:4': 2, - 'main:5': 2, - 'subsheet:2': 3, - 'subsheet:3': 3, - 'subsheet:4': 3, - 'subsheet:5': 3, - 'subsheet_test:2': 4, - 'subsubsheet:2': 3, + "main:2": 2, + "main:3": 2, + "main:4": 2, + "main:5": 2, + "subsheet:2": 3, + "subsheet:3": 3, + "subsheet:4": 3, + "subsheet:5": 3, + "subsheet_test:2": 4, + "subsubsheet:2": 3, } # TODO Check column names with a JSON schema - expected_headings = '''{ + expected_headings = """{ "main/id": [ [ "main", @@ -850,39 +854,41 @@ def test_unflatten(tmpdir): "testA" ] ] - }''' - assert lines_strip_whitespace(tmpdir.join('heading_source_map.json').read()) == lines_strip_whitespace(expected_headings) + }""" + assert lines_strip_whitespace( + tmpdir.join("heading_source_map.json").read() + ) == lines_strip_whitespace(expected_headings) heading_data = json.loads(expected_headings) headings = original_headings(heading_data) # Make sure every heading in the original appeared in the heading source map exactly once assert headings == [ - 'main:id', - 'main:ocid', - 'main:test/C', - 'main:test/id', - 'main:testA', - - 'subsheet:id', - 'subsheet:ocid', - 'subsheet:sub/0/id', - 'subsheet:sub/0/test2/E', - 'subsheet:sub/0/test2/F', - 'subsheet:sub/0/testD', - - 'subsheet_test:id', - 'subsheet_test:ocid', - 'subsheet_test:test/id', - 'subsheet_test:test/subsheet/0/id', - 'subsheet_test:test/subsheet/0/test2/E', - 'subsheet_test:test/subsheet/0/test2/F', - 'subsheet_test:test/subsheet/0/testD', - - 'subsubsheet:id', - 'subsubsheet:ocid', - 'subsubsheet:sub/0/id', - 'subsubsheet:sub/0/subsub/0/testG', + "main:id", + "main:ocid", + "main:test/C", + "main:test/id", + "main:testA", + "subsheet:id", + "subsheet:ocid", + "subsheet:sub/0/id", + "subsheet:sub/0/test2/E", + "subsheet:sub/0/test2/F", + "subsheet:sub/0/testD", + "subsheet_test:id", + "subsheet_test:ocid", + "subsheet_test:test/id", + "subsheet_test:test/subsheet/0/id", + "subsheet_test:test/subsheet/0/test2/E", + "subsheet_test:test/subsheet/0/test2/F", + "subsheet_test:test/subsheet/0/testD", + "subsubsheet:id", + "subsubsheet:ocid", + "subsubsheet:sub/0/id", + "subsubsheet:sub/0/subsub/0/testG", ] - assert lines_strip_whitespace(tmpdir.join('release.json').read()) == lines_strip_whitespace('''{ + assert lines_strip_whitespace( + tmpdir.join("release.json").read() + ) == lines_strip_whitespace( + """{ "main": [ { "ocid": "1", @@ -974,383 +980,470 @@ def test_unflatten(tmpdir): } } ] -}''') +}""" + ) def test_unflatten_empty(tmpdir): - input_dir = tmpdir.ensure('release_input', dir=True) - input_dir.join('main.csv').write_text( - 'ocid,id\n,\n,\n,', - encoding='utf8' - ) + input_dir = tmpdir.ensure("release_input", dir=True) + input_dir.join("main.csv").write_text("ocid,id\n,\n,\n,", encoding="utf8") unflatten( input_dir.strpath, - input_format='csv', - output_name=tmpdir.join('release.json').strpath, - main_sheet_name='main') - assert lines_strip_whitespace(tmpdir.join('release.json').read()) == lines_strip_whitespace('''{ + input_format="csv", + output_name=tmpdir.join("release.json").strpath, + main_sheet_name="main", + ) + assert lines_strip_whitespace( + tmpdir.join("release.json").read() + ) == lines_strip_whitespace( + """{ "main": [] - }''') + }""" + ) def test_unflatten_csv_utf8(tmpdir): - input_dir = tmpdir.ensure('release_input', dir=True) - input_dir.join('main.csv').write_text( - 'ocid,id\n1,éαГ😼𝒞人\n', - encoding='utf8' - ) + input_dir = tmpdir.ensure("release_input", dir=True) + input_dir.join("main.csv").write_text("ocid,id\n1,éαГ😼𝒞人\n", encoding="utf8") unflatten( input_dir.strpath, - input_format='csv', + input_format="csv", # Should default to utf8 - output_name=tmpdir.join('release.json').strpath, - main_sheet_name='main') - reloaded_json = json.load(tmpdir.join('release.json')) - assert reloaded_json == {'main': [{'ocid': '1', 'id': 'éαГ😼𝒞人'}]} + output_name=tmpdir.join("release.json").strpath, + main_sheet_name="main", + ) + reloaded_json = json.load(tmpdir.join("release.json")) + assert reloaded_json == {"main": [{"ocid": "1", "id": "éαГ😼𝒞人"}]} # The JSON we output should be UTF-8, rather than escaped ASCII # https://github.com/OpenDataServices/flatten-tool/issues/71 - assert 'éαГ😼𝒞人' in tmpdir.join('release.json').read_text(encoding='utf-8') + assert "éαГ😼𝒞人" in tmpdir.join("release.json").read_text(encoding="utf-8") def test_unflatten_csv_latin1(tmpdir): - input_dir = tmpdir.ensure('release_input', dir=True) - input_dir.join('main.csv').write_text( - 'ocid,id\n1,é\n', - encoding='latin1' - ) + input_dir = tmpdir.ensure("release_input", dir=True) + input_dir.join("main.csv").write_text("ocid,id\n1,é\n", encoding="latin1") unflatten( input_dir.strpath, - input_format='csv', - encoding='latin1', - output_name=tmpdir.join('release.json').strpath, - main_sheet_name='main') - reloaded_json = json.load(tmpdir.join('release.json')) - assert reloaded_json == {'main': [{'ocid': '1', 'id': 'é'}]} + input_format="csv", + encoding="latin1", + output_name=tmpdir.join("release.json").strpath, + main_sheet_name="main", + ) + reloaded_json = json.load(tmpdir.join("release.json")) + assert reloaded_json == {"main": [{"ocid": "1", "id": "é"}]} -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_unflatten_unicode(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/unicode.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/unicode.{}".format(input_format, input_format), input_format=input_format, - output_name=tmpdir.join('release.json').strpath, - main_sheet_name='main') - reloaded_json = json.load(tmpdir.join('release.json')) - assert reloaded_json == {'main': [{'ocid': 1 , 'id': 'éαГ😼𝒞人'}]} + output_name=tmpdir.join("release.json").strpath, + main_sheet_name="main", + ) + reloaded_json = json.load(tmpdir.join("release.json")) + assert reloaded_json == {"main": [{"ocid": 1, "id": "éαГ😼𝒞人"}]} -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_metatab(tmpdir, input_format): - tmpdir.join('metatab_schema.json').write( - '{"properties": {}}' - ) + tmpdir.join("metatab_schema.json").write('{"properties": {}}') unflatten( - 'flattentool/tests/fixtures/{}/basic_meta.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/basic_meta.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('meta_unflattened.json').strpath, - metatab_name='Meta', + output_name=tmpdir.join("meta_unflattened.json").strpath, + metatab_name="Meta", metatab_vertical_orientation=True, - metatab_schema = tmpdir.join('metatab_schema.json').strpath, - cell_source_map=tmpdir.join('meta_cell_source_map.json').strpath, - heading_source_map=tmpdir.join('meta_heading_source_map.json').strpath, - ) + metatab_schema=tmpdir.join("metatab_schema.json").strpath, + cell_source_map=tmpdir.join("meta_cell_source_map.json").strpath, + heading_source_map=tmpdir.join("meta_heading_source_map.json").strpath, + ) + + metatab_json = json.load(tmpdir.join("meta_unflattened.json")) + + assert metatab_json == { + "a": "a1", + "b": "b1", + "c": "c1", + "main": [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ], + } + + cell_source_map = json.load(tmpdir.join("meta_cell_source_map.json")) + + assert cell_source_map == { + "": [["Meta", 2]], + "a": [["Meta", "1", 2, "a"]], + "b": [["Meta", "2", 2, "b"]], + "c": [["Meta", "3", 2, "c"]], + "main/0": [["main", 2]], + "main/0/colA": [["main", "A", 2, "colA"]], + "main/0/colB": [["main", "B", 2, "colB"]], + "main/1": [["main", 3]], + "main/1/colA": [["main", "A", 3, "colA"]], + "main/1/colB": [["main", "B", 3, "colB"]], + "main/2": [["subsheet", 2]], + "main/2/colC": [["subsheet", "A", 2, "colC"]], + "main/2/colD": [["subsheet", "B", 2, "colD"]], + "main/3": [["subsheet", 3]], + "main/3/colC": [["subsheet", "A", 3, "colC"]], + "main/3/colD": [["subsheet", "B", 3, "colD"]], + } + + heading_source_map = json.load(tmpdir.join("meta_heading_source_map.json")) + + assert heading_source_map == { + "a": [["Meta", "a"]], + "b": [["Meta", "b"]], + "c": [["Meta", "c"]], + "main/colA": [["main", "colA"]], + "main/colB": [["main", "colB"]], + "main/colC": [["subsheet", "colC"]], + "main/colD": [["subsheet", "colD"]], + } + - metatab_json = json.load(tmpdir.join('meta_unflattened.json')) - - assert metatab_json == {'a': 'a1', - 'b': 'b1', - 'c': 'c1', - 'main': [{'colA': 'cell1', 'colB': 'cell2'}, - {'colA': 'cell3', 'colB': 'cell4'}, - {'colC': 'cell5', 'colD': 'cell6'}, - {'colC': 'cell7', 'colD': 'cell8'}]} - - - cell_source_map = json.load(tmpdir.join('meta_cell_source_map.json')) - - assert cell_source_map == {'': [['Meta', 2]], - 'a': [['Meta', '1', 2, 'a']], - 'b': [['Meta', '2', 2, 'b']], - 'c': [['Meta', '3', 2, 'c']], - 'main/0': [['main', 2]], - 'main/0/colA': [['main', 'A', 2, 'colA']], - 'main/0/colB': [['main', 'B', 2, 'colB']], - 'main/1': [['main', 3]], - 'main/1/colA': [['main', 'A', 3, 'colA']], - 'main/1/colB': [['main', 'B', 3, 'colB']], - 'main/2': [['subsheet', 2]], - 'main/2/colC': [['subsheet', 'A', 2, 'colC']], - 'main/2/colD': [['subsheet', 'B', 2, 'colD']], - 'main/3': [['subsheet', 3]], - 'main/3/colC': [['subsheet', 'A', 3, 'colC']], - 'main/3/colD': [['subsheet', 'B', 3, 'colD']]} - - heading_source_map = json.load(tmpdir.join('meta_heading_source_map.json')) - - assert heading_source_map == {'a': [['Meta', 'a']], - 'b': [['Meta', 'b']], - 'c': [['Meta', 'c']], - 'main/colA': [['main', 'colA']], - 'main/colB': [['main', 'colB']], - 'main/colC': [['subsheet', 'colC']], - 'main/colD': [['subsheet', 'colD']]} - - -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_metatab_only(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/basic_meta.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/basic_meta.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('meta_unflattened.json').strpath, - metatab_name='Meta', + output_name=tmpdir.join("meta_unflattened.json").strpath, + metatab_name="Meta", metatab_vertical_orientation=True, metatab_only=True, - cell_source_map=tmpdir.join('meta_cell_source_map.json').strpath, - heading_source_map=tmpdir.join('meta_heading_source_map.json').strpath, - ) - - metatab_json = json.load(tmpdir.join('meta_unflattened.json')) + cell_source_map=tmpdir.join("meta_cell_source_map.json").strpath, + heading_source_map=tmpdir.join("meta_heading_source_map.json").strpath, + ) - assert metatab_json == {'a': 'a1', - 'b': 'b1', - 'c': 'c1'} + metatab_json = json.load(tmpdir.join("meta_unflattened.json")) + assert metatab_json == {"a": "a1", "b": "b1", "c": "c1"} - cell_source_map = json.load(tmpdir.join('meta_cell_source_map.json')) + cell_source_map = json.load(tmpdir.join("meta_cell_source_map.json")) - assert cell_source_map == {'': [['Meta', 2]], - 'a': [['Meta', '1', 2, 'a']], - 'b': [['Meta', '2', 2, 'b']], - 'c': [['Meta', '3', 2, 'c']]} + assert cell_source_map == { + "": [["Meta", 2]], + "a": [["Meta", "1", 2, "a"]], + "b": [["Meta", "2", 2, "b"]], + "c": [["Meta", "3", 2, "c"]], + } - heading_source_map = json.load(tmpdir.join('meta_heading_source_map.json')) + heading_source_map = json.load(tmpdir.join("meta_heading_source_map.json")) - assert heading_source_map == {'a': [['Meta', 'a']], - 'b': [['Meta', 'b']], - 'c': [['Meta', 'c']]} + assert heading_source_map == { + "a": [["Meta", "a"]], + "b": [["Meta", "b"]], + "c": [["Meta", "c"]], + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_metatab_with_base(tmpdir, input_format): - tmpdir.join('base_json.json').write( - '{}' - ) + tmpdir.join("base_json.json").write("{}") with pytest.raises(Exception): unflatten( - 'flattentool/tests/fixtures/{}/basic_meta.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/basic_meta.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('meta_unflattened.json').strpath, - metatab_name='Meta', + output_name=tmpdir.join("meta_unflattened.json").strpath, + metatab_name="Meta", metatab_vertical_orientation=True, - base_json = tmpdir.join('base_json.json').strpath, - ) + base_json=tmpdir.join("base_json.json").strpath, + ) -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_bad_format(tmpdir, input_format): with pytest.raises(Exception): unflatten( - 'flattentool/tests/fixtures/{}/basic_meta.{}'.format(input_format, input_format), - input_format='what', - output_name=tmpdir.join('meta_unflattened.json').strpath, - ) + "flattentool/tests/fixtures/{}/basic_meta.{}".format( + input_format, input_format + ), + input_format="what", + output_name=tmpdir.join("meta_unflattened.json").strpath, + ) with pytest.raises(Exception): unflatten( - 'flattentool/tests/fixtures/{}/basic_meta.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/basic_meta.{}".format( + input_format, input_format + ), input_format=None, - output_name=tmpdir.join('meta_unflattened.json').strpath, - ) + output_name=tmpdir.join("meta_unflattened.json").strpath, + ) -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_single_sheet_spreadsheet(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_in_file.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_in_file.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('command_single_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_single_source_map.json').strpath, - heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, - ) + output_name=tmpdir.join("command_single_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_single_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_single_heading_source_map.json" + ).strpath, + ) + + unflattened = json.load(tmpdir.join("command_single_unflattened.json")) - unflattened = json.load(tmpdir.join('command_single_unflattened.json')) + assert unflattened == { + "main": [{"actual": "actual", "headings": "data", "some": "some"}] + } - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]} def test_commands_single_sheet_csv(tmpdir): unflatten( - 'flattentool/tests/fixtures/csv/commands_in_file', - input_format='csv', - output_name=tmpdir.join('command_single_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_single_source_map.json').strpath, - heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, - ) - unflattened = json.load(tmpdir.join('command_single_unflattened.json')) - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]} + "flattentool/tests/fixtures/csv/commands_in_file", + input_format="csv", + output_name=tmpdir.join("command_single_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_single_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_single_heading_source_map.json" + ).strpath, + ) + unflattened = json.load(tmpdir.join("command_single_unflattened.json")) + assert unflattened == { + "main": [{"actual": "actual", "headings": "data", "some": "some"}] + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_metatab(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_in_metatab.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_in_metatab.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('command_metatab_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_metatab_source_map.json').strpath, - heading_source_map=tmpdir.join('command_metatab_heading_source_map.json').strpath, - metatab_name='Meta', - metatab_vertical_orientation=True - ) + output_name=tmpdir.join("command_metatab_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_metatab_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_metatab_heading_source_map.json" + ).strpath, + metatab_name="Meta", + metatab_vertical_orientation=True, + ) - unflattened = json.load(tmpdir.join('command_metatab_unflattened.json')) + unflattened = json.load(tmpdir.join("command_metatab_unflattened.json")) - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}, {'actual': 'actual', 'headings': 'Other data', 'some': 'some'}], - 'some': 'data', 'anumber': 2} + assert unflattened == { + "main": [ + {"actual": "actual", "headings": "data", "some": "some"}, + {"actual": "actual", "headings": "Other data", "some": "some"}, + ], + "some": "data", + "anumber": 2, + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_single_sheet_default(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_defaulted.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_defaulted.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('command_single_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_single_source_map.json').strpath, - heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, + output_name=tmpdir.join("command_single_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_single_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_single_heading_source_map.json" + ).strpath, default_configuration="SkipRows 1, headerrows 2", - ) - - unflattened = json.load(tmpdir.join('command_single_unflattened.json')) + ) - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]} + unflattened = json.load(tmpdir.join("command_single_unflattened.json")) + assert unflattened == { + "main": [{"actual": "actual", "headings": "data", "some": "some"}] + } unflatten( - 'flattentool/tests/fixtures/{}/commands_defaulted.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_defaulted.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('command_single_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_single_source_map.json').strpath, - heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, + output_name=tmpdir.join("command_single_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_single_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_single_heading_source_map.json" + ).strpath, default_configuration="SkipRows 1", - ) + ) - unflattened = json.load(tmpdir.join('command_single_unflattened.json')) + unflattened = json.load(tmpdir.join("command_single_unflattened.json")) - assert unflattened == {'main': [{'actual': 'other', 'headings': 'headings', 'some': 'some'}, {'actual': 'actual', 'headings': 'data', 'some': 'some'}]} + assert unflattened == { + "main": [ + {"actual": "other", "headings": "headings", "some": "some"}, + {"actual": "actual", "headings": "data", "some": "some"}, + ] + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_default_override(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_in_metatab_defaulted.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_in_metatab_defaulted.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('command_metatab_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_metatab_source_map.json').strpath, - heading_source_map=tmpdir.join('command_metatab_heading_source_map.json').strpath, - metatab_name='Meta', + output_name=tmpdir.join("command_metatab_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_metatab_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_metatab_heading_source_map.json" + ).strpath, + metatab_name="Meta", metatab_vertical_orientation=True, default_configuration="headerrows 2", - ) + ) - unflattened = json.load(tmpdir.join('command_metatab_unflattened.json')) + unflattened = json.load(tmpdir.join("command_metatab_unflattened.json")) # In this case want both 'headerrows 2' and 'skiprows 1' (which is defined in the metatab) to be used, # as we only override individual commands not all of them, # So the results in this case will be the same as if using commands_in_metatab.xlsx (where all commands are in metatab). - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}, {'actual': 'actual', 'headings': 'Other data', 'some': 'some'}], - 'some': 'data'} + assert unflattened == { + "main": [ + {"actual": "actual", "headings": "data", "some": "some"}, + {"actual": "actual", "headings": "Other data", "some": "some"}, + ], + "some": "data", + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_ignore(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_ignore.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_ignore.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('command_single_unflattened.json').strpath, - cell_source_map=tmpdir.join('command_single_source_map.json').strpath, - heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, - ) + output_name=tmpdir.join("command_single_unflattened.json").strpath, + cell_source_map=tmpdir.join("command_single_source_map.json").strpath, + heading_source_map=tmpdir.join( + "command_single_heading_source_map.json" + ).strpath, + ) - unflattened = json.load(tmpdir.join('command_single_unflattened.json')) + unflattened = json.load(tmpdir.join("command_single_unflattened.json")) - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]} + assert unflattened == { + "main": [{"actual": "actual", "headings": "data", "some": "some"}] + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_hashcomments(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_hashcomments.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_hashcomments.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('commands_hashcomments_unflattened.json').strpath, - cell_source_map=tmpdir.join('commands_hashcomments_source_map.json').strpath, - heading_source_map=tmpdir.join('commands_hashcomments_heading_source_map.json').strpath, - metatab_name='Meta', - metatab_vertical_orientation=True - ) + output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath, + cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath, + heading_source_map=tmpdir.join( + "commands_hashcomments_heading_source_map.json" + ).strpath, + metatab_name="Meta", + metatab_vertical_orientation=True, + ) - unflattened = json.load(tmpdir.join('commands_hashcomments_unflattened.json')) + unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json")) - assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}, {'actual': 'actual', 'headings': 'Other data', 'some': 'some'}], - 'some': 'data'} + assert unflattened == { + "main": [ + {"actual": "actual", "headings": "data", "some": "some"}, + {"actual": "actual", "headings": "Other data", "some": "some"}, + ], + "some": "data", + } -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_hashcomments_sourcemap(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_hashcomments_sourcemap.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_hashcomments_sourcemap.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('commands_hashcomments_unflattened.json').strpath, - cell_source_map=tmpdir.join('commands_hashcomments_source_map.json').strpath, - heading_source_map=tmpdir.join('commands_hashcomments_heading_source_map.json').strpath, - metatab_name='Meta', - metatab_vertical_orientation=True - ) + output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath, + cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath, + heading_source_map=tmpdir.join( + "commands_hashcomments_heading_source_map.json" + ).strpath, + metatab_name="Meta", + metatab_vertical_orientation=True, + ) - unflattened = json.load(tmpdir.join('commands_hashcomments_unflattened.json')) - cell_source_map = json.load(tmpdir.join('commands_hashcomments_source_map.json')) + unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json")) + cell_source_map = json.load(tmpdir.join("commands_hashcomments_source_map.json")) - assert unflattened == {'publishedDate': '2019-06-20T00:00:00Z', - 'publisher': { - 'name': 'Open Data Services Co-operative' - }, - 'uri': 'http://www.example.com', - 'version': '1.1', - 'main': [{'date': '2010-03-15T09:30:00Z', 'id': 'Ocds-1'}] - } + assert unflattened == { + "publishedDate": "2019-06-20T00:00:00Z", + "publisher": {"name": "Open Data Services Co-operative"}, + "uri": "http://www.example.com", + "version": "1.1", + "main": [{"date": "2010-03-15T09:30:00Z", "id": "Ocds-1"}], + } # check fields have correct column letters - assert cell_source_map['main/0/date'][0][1] == 'E' - assert cell_source_map['main/0/id'][0][1] == 'C' + assert cell_source_map["main/0/date"][0][1] == "E" + assert cell_source_map["main/0/id"][0][1] == "C" -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_commands_id_name(tmpdir, input_format): unflatten( - 'flattentool/tests/fixtures/{}/commands_id_name.{}'.format(input_format, input_format), + "flattentool/tests/fixtures/{}/commands_id_name.{}".format( + input_format, input_format + ), input_format=input_format, - output_name=tmpdir.join('commands_id_name_unflattened.json').strpath, - cell_source_map=tmpdir.join('commands_id_name_source_map.json').strpath, - heading_source_map=tmpdir.join('commands_id_name_heading_source_map.json').strpath, - metatab_name='Meta', - metatab_vertical_orientation=True - ) - - unflattened = json.load(tmpdir.join('commands_id_name_unflattened.json')) + output_name=tmpdir.join("commands_id_name_unflattened.json").strpath, + cell_source_map=tmpdir.join("commands_id_name_source_map.json").strpath, + heading_source_map=tmpdir.join( + "commands_id_name_heading_source_map.json" + ).strpath, + metatab_name="Meta", + metatab_vertical_orientation=True, + ) - assert unflattened == {'someroot': [{'actual': 'actual', 'headings': 'data', 'someId': 'some', - "someArray": [ - {"heading1": "more data", "heading2": "other data"}, - {"heading1": "more more data", "heading2": "more other data"}, - ] - }], - 'some': 'data'} + unflattened = json.load(tmpdir.join("commands_id_name_unflattened.json")) + + assert unflattened == { + "someroot": [ + { + "actual": "actual", + "headings": "data", + "someId": "some", + "someArray": [ + {"heading1": "more data", "heading2": "other data"}, + {"heading1": "more more data", "heading2": "more other data"}, + ], + } + ], + "some": "data", + } diff --git a/flattentool/tests/test_input.py b/flattentool/tests/test_input.py index c6c4f571..7bfc0d09 100644 --- a/flattentool/tests/test_input.py +++ b/flattentool/tests/test_input.py @@ -4,36 +4,41 @@ Tests of SpreadsheetInput class and its children are in test_input_SpreadsheetInput*.py """ from __future__ import unicode_literals + from flattentool.input import path_search -from decimal import Decimal -from collections import OrderedDict -import sys -import pytest -import openpyxl -import datetime def test_path_search(): goal_dict = {} assert goal_dict is not {} # following tests rely on this assert path_search(goal_dict, []) is goal_dict - assert path_search( - {'testA': goal_dict}, - ['testA']) is goal_dict - assert path_search( - {'a1': {'b1': {'c1': goal_dict}}}, - ['a1', 'b1', 'c1']) is goal_dict - assert path_search( - {'a1': {'b1': {'c1': goal_dict}}}, - ['a1', 'b1[]'], - id_fields={'a1/b1[]/id': 'c1'}) is goal_dict - assert path_search( - {'a1': {'b1': {'c1': goal_dict}}}, - ['a1[]', 'c1'], - id_fields={'a1[]/id': 'b1'}) is goal_dict + assert path_search({"testA": goal_dict}, ["testA"]) is goal_dict + assert ( + path_search({"a1": {"b1": {"c1": goal_dict}}}, ["a1", "b1", "c1"]) is goal_dict + ) + assert ( + path_search( + {"a1": {"b1": {"c1": goal_dict}}}, + ["a1", "b1[]"], + id_fields={"a1/b1[]/id": "c1"}, + ) + is goal_dict + ) + assert ( + path_search( + {"a1": {"b1": {"c1": goal_dict}}}, + ["a1[]", "c1"], + id_fields={"a1[]/id": "b1"}, + ) + is goal_dict + ) # Top is always assumed to be an arary - assert path_search( - {'a1': {'b1': {'c1': goal_dict}}}, - ['a1', 'c1'], - id_fields={'a1/id': 'b1'}, - top=True) is goal_dict + assert ( + path_search( + {"a1": {"b1": {"c1": goal_dict}}}, + ["a1", "c1"], + id_fields={"a1/id": "b1"}, + top=True, + ) + is goal_dict + ) diff --git a/flattentool/tests/test_input_SpreadsheetInput.py b/flattentool/tests/test_input_SpreadsheetInput.py index dd775407..22484939 100644 --- a/flattentool/tests/test_input_SpreadsheetInput.py +++ b/flattentool/tests/test_input_SpreadsheetInput.py @@ -4,15 +4,24 @@ Tests of unflatten method are in test_input_SpreadsheetInput_unflatten.py """ from __future__ import unicode_literals -from flattentool.input import SpreadsheetInput, CSVInput, XLSXInput, ODSInput, convert_type -from decimal import Decimal -from collections import OrderedDict + +import datetime import sys +from collections import OrderedDict +from decimal import Decimal + import pytest -import openpyxl -import datetime import pytz +from flattentool.input import ( + CSVInput, + ODSInput, + SpreadsheetInput, + XLSXInput, + convert_type, +) + + class ListInput(SpreadsheetInput): def __init__(self, sheets, **kwargs): self.sheets = sheets @@ -24,191 +33,255 @@ def get_sheet_lines(self, sheet_name): def read_sheets(self): self.sub_sheet_names = list(self.sheets.keys()) + def test_spreadsheetinput_base_fails(): spreadsheet_input = SpreadsheetInput() with pytest.raises(NotImplementedError): spreadsheet_input.read_sheets() with pytest.raises(NotImplementedError): - spreadsheet_input.get_sheet_lines('test') + spreadsheet_input.get_sheet_lines("test") class TestSuccessfulInput(object): def test_csv_input(self, tmpdir): - main = tmpdir.join('main.csv') - main.write('colA,colB\ncell1,cell2\ncell3,cell4') - subsheet = tmpdir.join('subsheet.csv') - subsheet.write('colC,colD\ncell5,cell6\ncell7,cell8') + main = tmpdir.join("main.csv") + main.write("colA,colB\ncell1,cell2\ncell3,cell4") + subsheet = tmpdir.join("subsheet.csv") + subsheet.write("colC,colD\ncell5,cell6\ncell7,cell8") csvinput = CSVInput(input_name=tmpdir.strpath) csvinput.read_sheets() - assert csvinput.sub_sheet_names == ['main', 'subsheet'] - assert list(csvinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(csvinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert csvinput.sub_sheet_names == ["main", "subsheet"] + assert list(csvinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(csvinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_xlsx_input(self): - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic.xlsx') + xlsxinput = XLSXInput(input_name="flattentool/tests/fixtures/xlsx/basic.xlsx") xlsxinput.read_sheets() - assert xlsxinput.sub_sheet_names == ['main', 'subsheet'] - assert list(xlsxinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(xlsxinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert xlsxinput.sub_sheet_names == ["main", "subsheet"] + assert list(xlsxinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(xlsxinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_ods_input(self): - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/basic.ods') + odsinput = ODSInput(input_name="flattentool/tests/fixtures/ods/basic.ods") odsinput.read_sheets() - assert list(odsinput.sub_sheet_names) == ['main', 'subsheet'] - assert list(odsinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(odsinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert list(odsinput.sub_sheet_names) == ["main", "subsheet"] + assert list(odsinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(odsinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_xlsx_vertical(self): - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic_transpose.xlsx', vertical_orientation=True) + xlsxinput = XLSXInput( + input_name="flattentool/tests/fixtures/xlsx/basic_transpose.xlsx", + vertical_orientation=True, + ) xlsxinput.read_sheets() - assert xlsxinput.sub_sheet_names == ['main', 'subsheet'] - assert list(xlsxinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(xlsxinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert xlsxinput.sub_sheet_names == ["main", "subsheet"] + assert list(xlsxinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(xlsxinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_ods_vertical(self): - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/basic_transpose.ods', vertical_orientation=True) + odsinput = ODSInput( + input_name="flattentool/tests/fixtures/ods/basic_transpose.ods", + vertical_orientation=True, + ) odsinput.read_sheets() - assert list(odsinput.sub_sheet_names) == ['main', 'subsheet'] - assert list(odsinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(odsinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert list(odsinput.sub_sheet_names) == ["main", "subsheet"] + assert list(odsinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(odsinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_xlsx_include_ignore(self): - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic_meta.xlsx', - include_sheets=['Meta'], vertical_orientation=True - ) + xlsxinput = XLSXInput( + input_name="flattentool/tests/fixtures/xlsx/basic_meta.xlsx", + include_sheets=["Meta"], + vertical_orientation=True, + ) xlsxinput.read_sheets() - assert xlsxinput.sub_sheet_names == ['Meta'] - assert list(xlsxinput.get_sheet_lines('Meta')) == \ - [{'a': 'a1', 'b': 'b1', 'c': 'c1'}] - - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/basic_meta.xlsx', - exclude_sheets=['Meta']) + assert xlsxinput.sub_sheet_names == ["Meta"] + assert list(xlsxinput.get_sheet_lines("Meta")) == [ + {"a": "a1", "b": "b1", "c": "c1"} + ] + + xlsxinput = XLSXInput( + input_name="flattentool/tests/fixtures/xlsx/basic_meta.xlsx", + exclude_sheets=["Meta"], + ) xlsxinput.read_sheets() - assert xlsxinput.sub_sheet_names == ['main', 'subsheet'] - assert list(xlsxinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(xlsxinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert xlsxinput.sub_sheet_names == ["main", "subsheet"] + assert list(xlsxinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(xlsxinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_ods_include_ignore(self): - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/basic_meta.ods', - include_sheets=['Meta'], vertical_orientation=True - ) + odsinput = ODSInput( + input_name="flattentool/tests/fixtures/ods/basic_meta.ods", + include_sheets=["Meta"], + vertical_orientation=True, + ) odsinput.read_sheets() - assert list(odsinput.sub_sheet_names) == ['Meta'] - assert list(odsinput.get_sheet_lines('Meta')) == \ - [{'a': 'a1', 'b': 'b1', 'c': 'c1'}] - - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/basic_meta.ods', - exclude_sheets=['Meta']) + assert list(odsinput.sub_sheet_names) == ["Meta"] + assert list(odsinput.get_sheet_lines("Meta")) == [ + {"a": "a1", "b": "b1", "c": "c1"} + ] + + odsinput = ODSInput( + input_name="flattentool/tests/fixtures/ods/basic_meta.ods", + exclude_sheets=["Meta"], + ) odsinput.read_sheets() - assert list(odsinput.sub_sheet_names) == ['main', 'subsheet'] - assert list(odsinput.get_sheet_lines('main')) == \ - [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}] - assert list(odsinput.get_sheet_lines('subsheet')) == \ - [{'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}] + assert list(odsinput.sub_sheet_names) == ["main", "subsheet"] + assert list(odsinput.get_sheet_lines("main")) == [ + {"colA": "cell1", "colB": "cell2"}, + {"colA": "cell3", "colB": "cell4"}, + ] + assert list(odsinput.get_sheet_lines("subsheet")) == [ + {"colC": "cell5", "colD": "cell6"}, + {"colC": "cell7", "colD": "cell8"}, + ] def test_xlsx_input_types(self): - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/types.xlsx') + xlsxinput = XLSXInput(input_name="flattentool/tests/fixtures/xlsx/types.xlsx") xlsxinput.read_sheets() - assert list(xlsxinput.get_sheet_lines('main')) == \ - [{ - 'colInt': 1, - 'colFloat': 1.2, - 'colDate': datetime.datetime(2020, 3, 5), - 'colDateTime': datetime.datetime(2020, 2, 7, 16, 41, 0, 1), - }] - assert type(list(xlsxinput.get_sheet_lines('main'))[0]['colInt']) == int - assert type(list(xlsxinput.get_sheet_lines('main'))[0]['colFloat']) == float - assert xlsxinput.sub_sheet_names == ['main'] - + assert list(xlsxinput.get_sheet_lines("main")) == [ + { + "colInt": 1, + "colFloat": 1.2, + "colDate": datetime.datetime(2020, 3, 5), + "colDateTime": datetime.datetime(2020, 2, 7, 16, 41, 0, 1), + } + ] + assert type(list(xlsxinput.get_sheet_lines("main"))[0]["colInt"]) == int + assert type(list(xlsxinput.get_sheet_lines("main"))[0]["colFloat"]) == float + assert xlsxinput.sub_sheet_names == ["main"] + def test_ods_input_types(self): - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/types.ods') + odsinput = ODSInput(input_name="flattentool/tests/fixtures/ods/types.ods") odsinput.read_sheets() - assert list(odsinput.get_sheet_lines('main')) == \ - [{ - 'colInt': 1, - 'colFloat': 1.2, - 'colDate': '2020-03-05', - 'colDateTime': '2020-02-07T16:41:00Z', - }] - assert type(list(odsinput.get_sheet_lines('main'))[0]['colInt']) == int - assert type(list(odsinput.get_sheet_lines('main'))[0]['colFloat']) == float - assert list(odsinput.sub_sheet_names) == ['main'] + assert list(odsinput.get_sheet_lines("main")) == [ + { + "colInt": 1, + "colFloat": 1.2, + "colDate": "2020-03-05", + "colDateTime": "2020-02-07T16:41:00Z", + } + ] + assert type(list(odsinput.get_sheet_lines("main"))[0]["colInt"]) == int + assert type(list(odsinput.get_sheet_lines("main"))[0]["colFloat"]) == float + assert list(odsinput.sub_sheet_names) == ["main"] def test_xlsx_input_integer2(self): - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/integer2.xlsx') + xlsxinput = XLSXInput( + input_name="flattentool/tests/fixtures/xlsx/integer2.xlsx" + ) xlsxinput.read_sheets() - assert list(xlsxinput.get_sheet_lines('Sheet1')) == \ - [{'activity-status/@code': 2}] + assert list(xlsxinput.get_sheet_lines("Sheet1")) == [ + {"activity-status/@code": 2} + ] # This is a float, but is converted to an int in the unflatten step, see # test_input_SpreadsheetInput_unflatten.py # 'Basic with float' - assert type(list(xlsxinput.get_sheet_lines('Sheet1'))[0]['activity-status/@code']) == float - assert xlsxinput.sub_sheet_names == ['Sheet1'] + assert ( + type(list(xlsxinput.get_sheet_lines("Sheet1"))[0]["activity-status/@code"]) + == float + ) + assert xlsxinput.sub_sheet_names == ["Sheet1"] def test_ods_input_integer2(self): - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/integer2.ods') + odsinput = ODSInput(input_name="flattentool/tests/fixtures/ods/integer2.ods") odsinput.read_sheets() - assert list(odsinput.get_sheet_lines('Sheet1')) == \ - [{'activity-status/@code': 2}] - assert type(list(odsinput.get_sheet_lines('Sheet1'))[0]['activity-status/@code']) == int - assert list(odsinput.sub_sheet_names) == ['Sheet1'] + assert list(odsinput.get_sheet_lines("Sheet1")) == [ + {"activity-status/@code": 2} + ] + assert ( + type(list(odsinput.get_sheet_lines("Sheet1"))[0]["activity-status/@code"]) + == int + ) + assert list(odsinput.sub_sheet_names) == ["Sheet1"] def test_xlsx_input_formula(self): """ When a forumla is present, we should use the value, rather than the formula itself. """ - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/formula.xlsx') + xlsxinput = XLSXInput(input_name="flattentool/tests/fixtures/xlsx/formula.xlsx") xlsxinput.read_sheets() - assert xlsxinput.sub_sheet_names == ['main', 'subsheet'] - assert list(xlsxinput.get_sheet_lines('main')) == \ - [{'colA': 1, 'colB': 2}, {'colA': 2, 'colB': 4}] - assert list(xlsxinput.get_sheet_lines('subsheet')) == \ - [{'colC': 3, 'colD': 9}, {'colC': 4, 'colD': 12}] + assert xlsxinput.sub_sheet_names == ["main", "subsheet"] + assert list(xlsxinput.get_sheet_lines("main")) == [ + {"colA": 1, "colB": 2}, + {"colA": 2, "colB": 4}, + ] + assert list(xlsxinput.get_sheet_lines("subsheet")) == [ + {"colC": 3, "colD": 9}, + {"colC": 4, "colD": 12}, + ] def test_bad_xlsx(self): """ XLSX file that is not a XLSX""" - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/file.xlsx') + xlsxinput = XLSXInput(input_name="flattentool/tests/fixtures/xlsx/file.xlsx") try: xlsxinput.read_sheets() except Exception as e: - assert str(e) == "The supplied file has extension .xlsx but isn't an XLSX file." + assert ( + str(e) + == "The supplied file has extension .xlsx but isn't an XLSX file." + ) return assert False, "No Exception Raised" @@ -217,31 +290,35 @@ def test_ods_input_formula(self): """ When a forumla is present, we should use the value, rather than the formula itself. """ - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/formula.ods') + odsinput = ODSInput(input_name="flattentool/tests/fixtures/ods/formula.ods") odsinput.read_sheets() - assert list(odsinput.sub_sheet_names) == ['main', 'subsheet'] - assert list(odsinput.get_sheet_lines('main')) == \ - [OrderedDict([('colA', 1), ('colB', 2)]), OrderedDict([('colA', 2), ('colB', 4)])] - assert list(odsinput.get_sheet_lines('subsheet')) == \ - [OrderedDict([('colC', 3), ('colD', 9)]), OrderedDict([('colC', 4), ('colD', 12)])] + assert list(odsinput.sub_sheet_names) == ["main", "subsheet"] + assert list(odsinput.get_sheet_lines("main")) == [ + OrderedDict([("colA", 1), ("colB", 2)]), + OrderedDict([("colA", 2), ("colB", 4)]), + ] + assert list(odsinput.get_sheet_lines("subsheet")) == [ + OrderedDict([("colC", 3), ("colD", 9)]), + OrderedDict([("colC", 4), ("colD", 12)]), + ] class TestInputFailure(object): def test_csv_no_directory(self): - csvinput = CSVInput(input_name='nonesensedirectory') + csvinput = CSVInput(input_name="nonesensedirectory") with pytest.raises(FileNotFoundError): csvinput.read_sheets() def test_xlsx_no_file(self, tmpdir): - xlsxinput = XLSXInput(input_name=tmpdir.join('test.xlsx').strpath) + xlsxinput = XLSXInput(input_name=tmpdir.join("test.xlsx").strpath) with pytest.raises(FileNotFoundError): xlsxinput.read_sheets() def test_ods_no_file(self, tmpdir): - odsinput = ODSInput(input_name=tmpdir.join('test.ods').strpath) - if sys.version > '3': + odsinput = ODSInput(input_name=tmpdir.join("test.ods").strpath) + if sys.version > "3": with pytest.raises(FileNotFoundError): odsinput.read_sheets() else: @@ -251,130 +328,173 @@ def test_ods_no_file(self, tmpdir): class TestUnicodeInput(object): def test_csv_input_utf8(self, tmpdir): - main = tmpdir.join('main.csv') - main.write_text('colA\néαГ😼𝒞人', encoding='utf8') + main = tmpdir.join("main.csv") + main.write_text("colA\néαГ😼𝒞人", encoding="utf8") csvinput = CSVInput(input_name=tmpdir.strpath) # defaults to utf8 csvinput.read_sheets() - assert list(csvinput.get_sheet_lines('main')) == \ - [{'colA': 'éαГ😼𝒞人'}] - assert csvinput.sub_sheet_names == ['main'] + assert list(csvinput.get_sheet_lines("main")) == [{"colA": "éαГ😼𝒞人"}] + assert csvinput.sub_sheet_names == ["main"] def test_csv_input_latin1(self, tmpdir): - main = tmpdir.join('main.csv') - main.write_text('colA\né', encoding='latin-1') + main = tmpdir.join("main.csv") + main.write_text("colA\né", encoding="latin-1") csvinput = CSVInput(input_name=tmpdir.strpath) - csvinput.encoding = 'latin-1' + csvinput.encoding = "latin-1" csvinput.read_sheets() - assert list(csvinput.get_sheet_lines('main')) == \ - [{'colA': 'é'}] - assert csvinput.sub_sheet_names == ['main'] + assert list(csvinput.get_sheet_lines("main")) == [{"colA": "é"}] + assert csvinput.sub_sheet_names == ["main"] @pytest.mark.xfail( sys.version_info < (3, 0), - reason='Python 2 CSV readers does not support UTF-16 (or any encodings with null bytes') + reason="Python 2 CSV readers does not support UTF-16 (or any encodings with null bytes", + ) def test_csv_input_utf16(self, tmpdir): - main = tmpdir.join('main.csv') - main.write_text('colA\néαГ😼𝒞人', encoding='utf16') + main = tmpdir.join("main.csv") + main.write_text("colA\néαГ😼𝒞人", encoding="utf16") csvinput = CSVInput(input_name=tmpdir.strpath) - csvinput.encoding = 'utf16' + csvinput.encoding = "utf16" csvinput.read_sheets() - assert list(csvinput.get_sheet_lines('main')) == \ - [{'colA': 'éαГ😼𝒞人'}] - assert csvinput.sub_sheet_names == ['main'] + assert list(csvinput.get_sheet_lines("main")) == [{"colA": "éαГ😼𝒞人"}] + assert csvinput.sub_sheet_names == ["main"] def test_xlsx_input_utf8(self): """This is an xlsx file saved by OpenOffice. It seems to use UTF8 internally.""" - xlsxinput = XLSXInput(input_name='flattentool/tests/fixtures/xlsx/unicode.xlsx') + xlsxinput = XLSXInput(input_name="flattentool/tests/fixtures/xlsx/unicode.xlsx") xlsxinput.read_sheets() - assert list(xlsxinput.get_sheet_lines('main'))[0]['id'] == 'éαГ😼𝒞人' + assert list(xlsxinput.get_sheet_lines("main"))[0]["id"] == "éαГ😼𝒞人" def test_ods_input_utf8(self): """This is an ods file saved by OpenOffice. It seems to use UTF8 internally.""" - odsinput = ODSInput(input_name='flattentool/tests/fixtures/ods/unicode.ods') + odsinput = ODSInput(input_name="flattentool/tests/fixtures/ods/unicode.ods") odsinput.read_sheets() - assert list(odsinput.get_sheet_lines('main'))[0]['id'] == 'éαГ😼𝒞人' + assert list(odsinput.get_sheet_lines("main"))[0]["id"] == "éαГ😼𝒞人" def test_convert_type(recwarn): - si = SpreadsheetInput() - assert convert_type('', 'somestring') == 'somestring' + si = SpreadsheetInput() # noqa + assert convert_type("", "somestring") == "somestring" # If not type is specified, ints are kept as ints... - assert convert_type('', 3) == 3 + assert convert_type("", 3) == 3 # ... but all other ojbects are converted to strings class NotAString(object): def __str__(self): - return 'string representation' - assert NotAString() != 'string representation' - assert convert_type('', NotAString()) == 'string representation' - assert convert_type('string', NotAString()) == 'string representation' - - assert convert_type('string', 3) == '3' - assert convert_type('number', '3') == Decimal('3') - assert convert_type('number', '1.2') == Decimal('1.2') - assert convert_type('integer', '3') == 3 - assert convert_type('integer', 3) == 3 - - assert convert_type('boolean', 'TRUE') is True - assert convert_type('boolean', 'True') is True - assert convert_type('boolean', 1) is True - assert convert_type('boolean', '1') is True - assert convert_type('boolean', 'FALSE') is False - assert convert_type('boolean', 'False') is False - assert convert_type('boolean', 0) is False - assert convert_type('boolean', '0') is False - convert_type('boolean', 2) - assert 'Unrecognised value for boolean: "2"' in str(recwarn.pop(UserWarning).message) - convert_type('boolean', 'test') - assert 'Unrecognised value for boolean: "test"' in str(recwarn.pop(UserWarning).message) - - convert_type('integer', 'test') + return "string representation" + + assert NotAString() != "string representation" + assert convert_type("", NotAString()) == "string representation" + assert convert_type("string", NotAString()) == "string representation" + + assert convert_type("string", 3) == "3" + assert convert_type("number", "3") == Decimal("3") + assert convert_type("number", "1.2") == Decimal("1.2") + assert convert_type("integer", "3") == 3 + assert convert_type("integer", 3) == 3 + + assert convert_type("boolean", "TRUE") is True + assert convert_type("boolean", "True") is True + assert convert_type("boolean", 1) is True + assert convert_type("boolean", "1") is True + assert convert_type("boolean", "FALSE") is False + assert convert_type("boolean", "False") is False + assert convert_type("boolean", 0) is False + assert convert_type("boolean", "0") is False + convert_type("boolean", 2) + assert 'Unrecognised value for boolean: "2"' in str( + recwarn.pop(UserWarning).message + ) + convert_type("boolean", "test") + assert 'Unrecognised value for boolean: "test"' in str( + recwarn.pop(UserWarning).message + ) + + convert_type("integer", "test") assert 'Non-integer value "test"' in str(recwarn.pop(UserWarning).message) - convert_type('number', 'test') + convert_type("number", "test") assert 'Non-numeric value "test"' in str(recwarn.pop(UserWarning).message) - assert convert_type('string', '') is None - assert convert_type('number', '') is None - assert convert_type('integer', '') is None - assert convert_type('array', '') is None - assert convert_type('boolean', '') is None - assert convert_type('string', None) is None - assert convert_type('number', None) is None - assert convert_type('integer', None) is None - assert convert_type('array', None) is None - assert convert_type('boolean', None) is None - - for type_string in ['array', 'string_array', 'array_array', 'number_array']: - assert convert_type(type_string, 'one') == ['one'] - assert convert_type(type_string, 'one;two') == ['one', 'two'] - assert convert_type(type_string, 'one,two;three,four') == [['one', 'two'], ['three', 'four']] + assert convert_type("string", "") is None + assert convert_type("number", "") is None + assert convert_type("integer", "") is None + assert convert_type("array", "") is None + assert convert_type("boolean", "") is None + assert convert_type("string", None) is None + assert convert_type("number", None) is None + assert convert_type("integer", None) is None + assert convert_type("array", None) is None + assert convert_type("boolean", None) is None + + for type_string in ["array", "string_array", "array_array", "number_array"]: + assert convert_type(type_string, "one") == ["one"] + assert convert_type(type_string, "one;two") == ["one", "two"] + assert convert_type(type_string, "one,two;three,four") == [ + ["one", "two"], + ["three", "four"], + ] assert 'Non-numeric value "one"' in str(recwarn.pop(UserWarning).message) assert 'Non-numeric value "one;two"' in str(recwarn.pop(UserWarning).message) - assert 'Non-numeric value "one,two;three,four"' in str(recwarn.pop(UserWarning).message) - assert convert_type('number_array', '1') == [1] - assert convert_type('number_array', '1;2') == [1, 2] - assert convert_type('number_array', '1,2;3,4') == [[1, 2], [3, 4]] + assert 'Non-numeric value "one,two;three,four"' in str( + recwarn.pop(UserWarning).message + ) + assert convert_type("number_array", "1") == [1] + assert convert_type("number_array", "1;2") == [1, 2] + assert convert_type("number_array", "1,2;3,4") == [[1, 2], [3, 4]] with pytest.raises(ValueError) as e: - convert_type('notatype', 'test') + convert_type("notatype", "test") assert 'Unrecognised type: "notatype"' in str(e) - assert convert_type('string', datetime.datetime(2015, 1, 1)) == '2015-01-01T00:00:00+00:00' - assert convert_type('', datetime.datetime(2015, 1, 1)) == '2015-01-01T00:00:00+00:00' - assert convert_type('string', datetime.datetime(2015, 1, 1, 13, 37, 59)) == '2015-01-01T13:37:59+00:00' - assert convert_type('', datetime.datetime(2015, 1, 1, 13, 37, 59)) == '2015-01-01T13:37:59+00:00' - - timezone = pytz.timezone('Europe/London') - assert convert_type('string', datetime.datetime(2015, 1, 1), timezone) == '2015-01-01T00:00:00+00:00' - assert convert_type('', datetime.datetime(2015, 1, 1), timezone) == '2015-01-01T00:00:00+00:00' - assert convert_type('string', datetime.datetime(2015, 1, 1, 13, 37, 59), timezone) == '2015-01-01T13:37:59+00:00' - assert convert_type('', datetime.datetime(2015, 1, 1, 13, 37, 59), timezone) == '2015-01-01T13:37:59+00:00' - assert convert_type('string', datetime.datetime(2015, 6, 1), timezone) == '2015-06-01T00:00:00+01:00' - assert convert_type('', datetime.datetime(2015, 6, 1), timezone) == '2015-06-01T00:00:00+01:00' - assert convert_type('string', datetime.datetime(2015, 6, 1, 13, 37, 59), timezone) == '2015-06-01T13:37:59+01:00' - assert convert_type('', datetime.datetime(2015, 6, 1, 13, 37, 59), timezone) == '2015-06-01T13:37:59+01:00' + assert ( + convert_type("string", datetime.datetime(2015, 1, 1)) + == "2015-01-01T00:00:00+00:00" + ) + assert ( + convert_type("", datetime.datetime(2015, 1, 1)) == "2015-01-01T00:00:00+00:00" + ) + assert ( + convert_type("string", datetime.datetime(2015, 1, 1, 13, 37, 59)) + == "2015-01-01T13:37:59+00:00" + ) + assert ( + convert_type("", datetime.datetime(2015, 1, 1, 13, 37, 59)) + == "2015-01-01T13:37:59+00:00" + ) + + timezone = pytz.timezone("Europe/London") + assert ( + convert_type("string", datetime.datetime(2015, 1, 1), timezone) + == "2015-01-01T00:00:00+00:00" + ) + assert ( + convert_type("", datetime.datetime(2015, 1, 1), timezone) + == "2015-01-01T00:00:00+00:00" + ) + assert ( + convert_type("string", datetime.datetime(2015, 1, 1, 13, 37, 59), timezone) + == "2015-01-01T13:37:59+00:00" + ) + assert ( + convert_type("", datetime.datetime(2015, 1, 1, 13, 37, 59), timezone) + == "2015-01-01T13:37:59+00:00" + ) + assert ( + convert_type("string", datetime.datetime(2015, 6, 1), timezone) + == "2015-06-01T00:00:00+01:00" + ) + assert ( + convert_type("", datetime.datetime(2015, 6, 1), timezone) + == "2015-06-01T00:00:00+01:00" + ) + assert ( + convert_type("string", datetime.datetime(2015, 6, 1, 13, 37, 59), timezone) + == "2015-06-01T13:37:59+01:00" + ) + assert ( + convert_type("", datetime.datetime(2015, 6, 1, 13, 37, 59), timezone) + == "2015-06-01T13:37:59+01:00" + ) assert len(recwarn) == 0 diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten.py index 4f1e2227..d9e1e1c7 100644 --- a/flattentool/tests/test_input_SpreadsheetInput_unflatten.py +++ b/flattentool/tests/test_input_SpreadsheetInput_unflatten.py @@ -5,20 +5,17 @@ """ from __future__ import unicode_literals -from .test_input_SpreadsheetInput import ListInput -from flattentool.schema import SchemaParser -from decimal import Decimal + from collections import OrderedDict -import sys +from decimal import Decimal + import pytest -import openpyxl -import datetime -import copy -ROOT_ID_TITLES = { - 'ocid': 'Open Contracting ID', - 'custom': 'Custom' -} +from flattentool.schema import SchemaParser + +from .test_input_SpreadsheetInput import ListInput + +ROOT_ID_TITLES = {"ocid": "Open Contracting ID", "custom": "Custom"} def inject_root_id(root_id, d): @@ -27,13 +24,13 @@ def inject_root_id(root_id, d): """ new_d = type(d)() for k, v in d.items(): - if k == 'ROOT_ID': - if root_id == '': + if k == "ROOT_ID": + if root_id == "": continue else: k = root_id - elif k == 'ROOT_ID_TITLE': - if root_id == '': + elif k == "ROOT_ID_TITLE": + if root_id == "": continue else: k = ROOT_ID_TITLES[root_id] @@ -41,1031 +38,768 @@ def inject_root_id(root_id, d): return new_d -UNICODE_TEST_STRING = 'éαГ😼𝒞人' +UNICODE_TEST_STRING = "éαГ😼𝒞人" # ROOT_ID will be replace by the appropirate root_id name in the test (e.g. ocid) testdata = [ ( - 'Basic flat', - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testA': 3 - }], - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testA': 3 - }], + "Basic flat", + [{"ROOT_ID": "1", "id": 2, "testA": 3}], + [{"ROOT_ID": "1", "id": 2, "testA": 3}], [], - True + True, ), ( - 'Basic with float', + "Basic with float", # 3.0 is converted to 3 # This is needed to handle google docs xlsx properly # https://github.com/OpenDataServices/cove/issues/838 - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testA': 3.0 - }], - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testA': 3 - }], + [{"ROOT_ID": "1", "id": 2, "testA": 3.0}], + [{"ROOT_ID": "1", "id": 2, "testA": 3}], [], - True + True, ), ( - 'Basic with zero', - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testA': 0 - }], - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testA': 0 - }], + "Basic with zero", + [{"ROOT_ID": "1", "id": 2, "testA": 0}], + [{"ROOT_ID": "1", "id": 2, "testA": 0}], [], - True + True, ), ( - 'Nested', - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testO/testB': 3, - 'testO/testC': 4, - }], - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testO': {'testB': 3, 'testC': 4} - }], + "Nested", + [{"ROOT_ID": "1", "id": 2, "testO/testB": 3, "testO/testC": 4,}], + [{"ROOT_ID": "1", "id": 2, "testO": {"testB": 3, "testC": 4}}], [], - True + True, ), ( - 'Unicode', - [{ - 'ROOT_ID': UNICODE_TEST_STRING, - 'testU': UNICODE_TEST_STRING - }], - [{ - 'ROOT_ID': UNICODE_TEST_STRING, - 'testU': UNICODE_TEST_STRING - }], + "Unicode", + [{"ROOT_ID": UNICODE_TEST_STRING, "testU": UNICODE_TEST_STRING}], + [{"ROOT_ID": UNICODE_TEST_STRING, "testU": UNICODE_TEST_STRING}], [], - True + True, ), ( - 'Single item array', - [{ - 'ROOT_ID': '1', - 'id': 2, - 'testL/0/id': 3, - 'testL/0/testB': 4 - }], - [{ - 'ROOT_ID': '1', 'id': 2, 'testL': [{ - 'id': 3, 'testB': 4 - }], - }], + "Single item array", + [{"ROOT_ID": "1", "id": 2, "testL/0/id": 3, "testL/0/testB": 4}], + [{"ROOT_ID": "1", "id": 2, "testL": [{"id": 3, "testB": 4}],}], [], False, ), ( - 'Single item array without parent ID', - [{ - 'ROOT_ID': '1', - 'testL/0/id': '2', - 'testL/0/testB': '3', - }], - [{ - 'ROOT_ID': '1', - 'testL': [{ - 'id': '2', - 'testB': '3' - }], - }], + "Single item array without parent ID", + [{"ROOT_ID": "1", "testL/0/id": "2", "testL/0/testB": "3",}], + [{"ROOT_ID": "1", "testL": [{"id": "2", "testB": "3"}],}], [], - False + False, ), ( - 'Empty', - [{ - 'ROOT_ID': '', - 'id': '', - 'testA': '', - 'testB': '', - 'testC': '', - 'testD': '', - 'testE': '', - }], + "Empty", + [ + { + "ROOT_ID": "", + "id": "", + "testA": "", + "testB": "", + "testC": "", + "testD": "", + "testE": "", + } + ], [], [], - False + False, ), ( - 'Empty except for root id', - [{ - 'ROOT_ID': 1, - 'id': '', - 'testA': '', - 'testB': '', - 'testC': '', - 'testD': '', - 'testE': '', - }], - [{ - 'ROOT_ID': 1 - }], + "Empty except for root id", + [ + { + "ROOT_ID": 1, + "id": "", + "testA": "", + "testB": "", + "testC": "", + "testD": "", + "testE": "", + } + ], + [{"ROOT_ID": 1}], [], - False + False, ), -# Previously this caused the error: TypeError: unorderable types: str() < int() -# Now one of the columns is ignored + # Previously this caused the error: TypeError: unorderable types: str() < int() + # Now one of the columns is ignored ( - 'Mismatch of object/array for field not in schema', - [OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/a', 3), - ('newtest/0/a', 4), - ])], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': { - 'a': 3, - } - }], - ['Column newtest/0/a has been ignored, because it treats newtest as an array, but another column does not.'], - False + "Mismatch of object/array for field not in schema", + [ + OrderedDict( + [("ROOT_ID", 1), ("id", 2), ("newtest/a", 3), ("newtest/0/a", 4),] + ) + ], + [{"ROOT_ID": 1, "id": 2, "newtest": {"a": 3,}}], + [ + "Column newtest/0/a has been ignored, because it treats newtest as an array, but another column does not." + ], + False, ), -# Previously this caused the error: TypeError: unorderable types: str() < int() -# Now one of the columns is ignored + # Previously this caused the error: TypeError: unorderable types: str() < int() + # Now one of the columns is ignored ( - 'Mismatch of array/object for field not in schema', - [OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/0/a', 4), - ('newtest/a', 3), - ])], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': [ - {'a': 4} - ] - }], - ['Column newtest/a has been ignored, because it treats newtest as an object, but another column does not.'], - False + "Mismatch of array/object for field not in schema", + [ + OrderedDict( + [("ROOT_ID", 1), ("id", 2), ("newtest/0/a", 4), ("newtest/a", 3),] + ) + ], + [{"ROOT_ID": 1, "id": 2, "newtest": [{"a": 4}]}], + [ + "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not." + ], + False, ), -# Previously this caused the error: 'Cell' object has no attribute 'get' -# Now one of the columns is ignored + # Previously this caused the error: 'Cell' object has no attribute 'get' + # Now one of the columns is ignored ( - 'str / array mixing', - [OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest', 3), - ('newtest/0/a', 4), - ])], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': 3 - }], - ['Column newtest/0/a has been ignored, because it treats newtest as an array, but another column does not.'], - False + "str / array mixing", + [OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest", 3), ("newtest/0/a", 4),])], + [{"ROOT_ID": 1, "id": 2, "newtest": 3}], + [ + "Column newtest/0/a has been ignored, because it treats newtest as an array, but another column does not." + ], + False, ), ( - 'str / object mixing', - [OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest', 3), - ('newtest/a', 4), - ])], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': 3 - }], - ['Column newtest/a has been ignored, because it treats newtest as an object, but another column does not.'], - False + "str / object mixing", + [OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest", 3), ("newtest/a", 4),])], + [{"ROOT_ID": 1, "id": 2, "newtest": 3}], + [ + "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not." + ], + False, ), ( - 'array / str mixing', - [OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest/0/a', 3), - ('nest/newtest', 4), - ])], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'nest': { - 'newtest': [{ - 'a': 3 - }] - } - }], - ['Column nest/newtest has been ignored, because another column treats it as an array or object'], - False + "array / str mixing", + [ + OrderedDict( + [ + ("ROOT_ID", 1), + ("id", 2), + ("nest/newtest/0/a", 3), + ("nest/newtest", 4), + ] + ) + ], + [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": [{"a": 3}]}}], + [ + "Column nest/newtest has been ignored, because another column treats it as an array or object" + ], + False, ), ( - 'object / str mixing', - [OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/a', 3), - ('newtest', 4), - ])], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': { - 'a': 3 - } - }], - ['Column newtest has been ignored, because another column treats it as an array or object'], - False + "object / str mixing", + [OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest/a", 3), ("newtest", 4),])], + [{"ROOT_ID": 1, "id": 2, "newtest": {"a": 3}}], + [ + "Column newtest has been ignored, because another column treats it as an array or object" + ], + False, ), ( - 'Mismatch of object/array for field not in schema (multiline)', + "Mismatch of object/array for field not in schema (multiline)", [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest/a', 3), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest/0/a', 4), - ]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("nest/newtest/a", 3),]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("nest/newtest/0/a", 4),]), ], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'nest': { - 'newtest': { - 'a': 3, - } - } - }], - ['Column nest/newtest/0/a has been ignored, because it treats newtest as an array, but another column does not'], - False + [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": {"a": 3,}}}], + [ + "Column nest/newtest/0/a has been ignored, because it treats newtest as an array, but another column does not" + ], + False, ), -# Previously this caused the error: TypeError: unorderable types: str() < int() -# Now one of the columns is ignored + # Previously this caused the error: TypeError: unorderable types: str() < int() + # Now one of the columns is ignored ( - 'Mismatch of array/object for field not in schema (multiline)', + "Mismatch of array/object for field not in schema (multiline)", [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/0/a', 4), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/a', 3), - ]) + OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest/0/a", 4),]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest/a", 3),]), ], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': [ - {'a': 4} - ] - }], - ['Column newtest/a has been ignored, because it treats newtest as an object, but another column does not'], - False + [{"ROOT_ID": 1, "id": 2, "newtest": [{"a": 4}]}], + [ + "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not" + ], + False, ), -# Previously this caused the error: 'Cell' object has no attribute 'get' -# Now one of the columns is ignored + # Previously this caused the error: 'Cell' object has no attribute 'get' + # Now one of the columns is ignored ( - 'str / array mixing multiline', + "str / array mixing multiline", [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest', 3), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest/0/a', 4), - ('nest/newtest/0/b', 5), - ]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("nest/newtest", 3),]), + OrderedDict( + [ + ("ROOT_ID", 1), + ("id", 2), + ("nest/newtest/0/a", 4), + ("nest/newtest/0/b", 5), + ] + ), ], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'nest': { - 'newtest': 3 - } - }], + [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": 3}}], [ - 'Column nest/newtest/0/a has been ignored, because it treats newtest as an array, but another column does not', - 'Column nest/newtest/0/b has been ignored, because it treats newtest as an array, but another column does not', + "Column nest/newtest/0/a has been ignored, because it treats newtest as an array, but another column does not", + "Column nest/newtest/0/b has been ignored, because it treats newtest as an array, but another column does not", ], - False + False, ), ( - 'array / str mixing multiline', + "array / str mixing multiline", # same as above, but with rows switched [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest/0/a', 4), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('nest/newtest', 3), - ]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("nest/newtest/0/a", 4),]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("nest/newtest", 3),]), ], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'nest': { - 'newtest': [ - {'a': 4} - ] - } - }], - ['Column nest/newtest has been ignored, because another column treats it as an array or object'], - False + [{"ROOT_ID": 1, "id": 2, "nest": {"newtest": [{"a": 4}]}}], + [ + "Column nest/newtest has been ignored, because another column treats it as an array or object" + ], + False, ), -# WARNING: Conflict when merging field "newtest" for id "2" in sheet custom_main: "3" + # WARNING: Conflict when merging field "newtest" for id "2" in sheet custom_main: "3" ( - 'str / object mixing multiline', + "str / object mixing multiline", [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest', 3), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/a', 4), - ('newtest/b', 5), - ]) + OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest", 3),]), + OrderedDict( + [("ROOT_ID", 1), ("id", 2), ("newtest/a", 4), ("newtest/b", 5),] + ), ], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': 3 - }], + [{"ROOT_ID": 1, "id": 2, "newtest": 3}], [ - 'Column newtest/a has been ignored, because it treats newtest as an object, but another column does not', - 'Column newtest/b has been ignored, because it treats newtest as an object, but another column does not', + "Column newtest/a has been ignored, because it treats newtest as an object, but another column does not", + "Column newtest/b has been ignored, because it treats newtest as an object, but another column does not", ], - False + False, ), ( - 'object / str mixing multiline', + "object / str mixing multiline", [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest/a', 4), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('newtest', 3), - ]) + OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest/a", 4),]), + OrderedDict([("ROOT_ID", 1), ("id", 2), ("newtest", 3),]), ], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'newtest': { - 'a': 4 - } - }], - ['Column newtest has been ignored, because another column treats it as an array or object'], - False + [{"ROOT_ID": 1, "id": 2, "newtest": {"a": 4}}], + [ + "Column newtest has been ignored, because another column treats it as an array or object" + ], + False, ), -# Previously this caused the error: KeyError('ocid',) -# Now it works, but probably not as intended -# The missing Root ID should be picked up in schema validation -# (Cove will do this automatically). + # Previously this caused the error: KeyError('ocid',) + # Now it works, but probably not as intended + # The missing Root ID should be picked up in schema validation + # (Cove will do this automatically). ( - 'Root ID is missing', - [OrderedDict([ - ('id', 2), - ('testA', 3), - ])], - [{ - 'id': 2, - 'testA': 3 - }], + "Root ID is missing", + [OrderedDict([("id", 2), ("testA", 3),])], + [{"id": 2, "testA": 3}], [], - False + False, ), -# We should be able to handle numbers as column headings + # We should be able to handle numbers as column headings ( - 'Non-string column headings', - [OrderedDict([ - (1, 'A'), - (2, 'AA'), - ('3', 'AAA'), - ('4', 'AAAA'), - (Decimal('2.2'), 'B'), - (2.3, 'C'), - (False, 'D'), - ])], - [{ - '2.2': 'B', - '2.3': 'C', - 'False': 'D', - }], + "Non-string column headings", + [ + OrderedDict( + [ + (1, "A"), + (2, "AA"), + ("3", "AAA"), + ("4", "AAAA"), + (Decimal("2.2"), "B"), + (2.3, "C"), + (False, "D"), + ] + ) + ], + [{"2.2": "B", "2.3": "C", "False": "D",}], [ 'Column "1" has been ignored because it is a number.', 'Column "2" has been ignored because it is a number.', 'Column "3" has been ignored because it is a number.', 'Column "4" has been ignored because it is a number.', ], - False - ) + False, + ), ] # Test cases that require our schema aware JSON pointer logic, so must be run # with the relevant schema testdata_pointer = [ ( - 'Single item array without json numbering', - [{ - 'ROOT_ID': '1', - 'testR/id': '2', - 'testR/testB': '3', - 'testR/testX': '3', - }], - [{ - 'ROOT_ID': '1', - 'testR': [{ - 'id': '2', - 'testB': '3', - 'testX': '3' - }], - }], - [] + "Single item array without json numbering", + [{"ROOT_ID": "1", "testR/id": "2", "testR/testB": "3", "testR/testX": "3",}], + [{"ROOT_ID": "1", "testR": [{"id": "2", "testB": "3", "testX": "3"}],}], + [], ), ( - 'Multi item array one with varied numbering ', - [{ - 'ROOT_ID': '1', - 'testR/id': '-1', - 'testR/testB': '-1', - 'testR/testX': '-2', - 'testR/0/id': '0', - 'testR/0/testB': '1', - 'testR/0/testX': '1', - 'testR/5/id': '5', - 'testR/5/testB': '5', - 'testR/5/testX': '6', - }], - [{ - 'ROOT_ID': '1', - 'testR': [{ - 'id': '-1', - 'testB': '-1', - 'testX': '-2' - }, + "Multi item array one with varied numbering ", + [ { - 'id': '0', - 'testB': '1', - 'testX': '1' - }, + "ROOT_ID": "1", + "testR/id": "-1", + "testR/testB": "-1", + "testR/testX": "-2", + "testR/0/id": "0", + "testR/0/testB": "1", + "testR/0/testX": "1", + "testR/5/id": "5", + "testR/5/testB": "5", + "testR/5/testX": "6", + } + ], + [ { - 'id': '5', - 'testB': '5', - 'testX': '6' + "ROOT_ID": "1", + "testR": [ + {"id": "-1", "testB": "-1", "testX": "-2"}, + {"id": "0", "testB": "1", "testX": "1"}, + {"id": "5", "testB": "5", "testX": "6"}, + ], } - ] - }], - [] + ], + [], ), ] + def create_schema(root_id): schema = { - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'integer', - }, - 'testA': { - 'title': 'A title', - 'type': 'integer', - }, - 'testB': { - 'title': 'B title', - 'type': 'object', - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'integer', - }, - 'testC': { - 'title': 'C title', - 'type': 'integer', - }, - 'testD': { - 'title': 'D title', - 'type': 'integer', + "properties": { + "id": {"title": "Identifier", "type": "integer",}, + "testA": {"title": "A title", "type": "integer",}, + "testB": { + "title": "B title", + "type": "object", + "properties": { + "id": {"title": "Identifier", "type": "integer",}, + "testC": {"title": "C title", "type": "integer",}, + "testD": {"title": "D title", "type": "integer",}, + "subField": { + "title": "Sub title", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"title": "Identifier", "type": "integer",}, + "testE": {"title": "E title", "type": "integer",}, + }, + }, }, - 'subField': { - 'title': 'Sub title', - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'integer', - }, - 'testE': { - 'title': 'E title', - 'type': 'integer', - }, - } - } - } - } + }, }, - 'testArr': { - 'title': 'Arr title', - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'string', - }, - 'testB': { - 'title': 'B title', - 'type': 'string', - }, - 'testC': { - 'title': 'C title', - 'type': 'string', - }, - 'testNest': { - 'title': 'Nest title', - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'string', - }, - 'testD': { - 'title': 'D title', - 'type': 'string', - }, - } - } - }, - 'testNestObj': { - 'title': 'NestObj title', - 'type': 'object', - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'string', - }, - 'testD': { - 'title': 'D title', - 'type': 'string', + "testArr": { + "title": "Arr title", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"title": "Identifier", "type": "string",}, + "testB": {"title": "B title", "type": "string",}, + "testC": {"title": "C title", "type": "string",}, + "testNest": { + "title": "Nest title", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"title": "Identifier", "type": "string",}, + "testD": {"title": "D title", "type": "string",}, }, - } + }, + }, + "testNestObj": { + "title": "NestObj title", + "type": "object", + "properties": { + "id": {"title": "Identifier", "type": "string",}, + "testD": {"title": "D title", "type": "string",}, + }, }, - } - } + }, + }, }, - 'testR': { - 'title': 'R title', - 'type': 'array', - 'rollUp': ['id', 'testB'], - 'items': { - 'type': 'object', - 'properties': { - 'id': { - 'title': 'Identifier', - 'type': 'string', + "testR": { + "title": "R title", + "type": "array", + "rollUp": ["id", "testB"], + "items": { + "type": "object", + "properties": { + "id": { + "title": "Identifier", + "type": "string", # 'type': 'integer', # integer does not work, as testB:integer is not # in the rollUp }, - 'testB': { - 'title': 'B title', - 'type': 'string', - }, - 'testC': { - 'title': 'C title', - 'type': 'string', - }, - 'testSA': { - 'title': 'SA title', - 'type': 'array', - 'items': { - 'type': 'string' - } + "testB": {"title": "B title", "type": "string",}, + "testC": {"title": "C title", "type": "string",}, + "testSA": { + "title": "SA title", + "type": "array", + "items": {"type": "string"}, }, - } - } + }, + }, }, - 'testU': { - 'title': UNICODE_TEST_STRING, - 'type': 'string', + "testU": {"title": UNICODE_TEST_STRING, "type": "string",}, + "testSA": { + "title": "SA title", + "type": "array", + "items": {"type": "string"}, }, - 'testSA': { - 'title': 'SA title', - 'type': 'array', - 'items': { - 'type': 'string' - } - } } } if root_id: - schema.update({ - root_id: { - 'title': ROOT_ID_TITLES[root_id], - 'type': 'string' - } - }) + schema.update({root_id: {"title": ROOT_ID_TITLES[root_id], "type": "string"}}) return schema + testdata_titles = [ ( - 'Basic flat', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'A title': 3 - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testA': 3 - }], + "Basic flat", + [{"ROOT_ID_TITLE": 1, "Identifier": 2, "A title": 3}], + [{"ROOT_ID": 1, "id": 2, "testA": 3}], [], - True + True, ), ( - 'Nested', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'B title:C title': 3, - 'B title:D title': 4, - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testB': {'testC': 3, 'testD': 4} - }], + "Nested", + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "B title:C title": 3, + "B title:D title": 4, + } + ], + [{"ROOT_ID": 1, "id": 2, "testB": {"testC": 3, "testD": 4}}], [], - True + True, ), ( - 'Nested titles should be converted individually', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'B title:C title': 3, - 'B title:Not in schema': 4, - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testB': {'testC': 3, 'Not in schema': 4} - }], + "Nested titles should be converted individually", + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "B title:C title": 3, + "B title:Not in schema": 4, + } + ], + [{"ROOT_ID": 1, "id": 2, "testB": {"testC": 3, "Not in schema": 4}}], [], - False + False, ), ( - 'Should be space and case invariant', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'B title : c title': 3, - 'btitle : Not in schema': 4, - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testB': {'testC': 3, 'Not in schema': 4} - }], + "Should be space and case invariant", + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "B title : c title": 3, + "btitle : Not in schema": 4, + } + ], + [{"ROOT_ID": 1, "id": 2, "testB": {"testC": 3, "Not in schema": 4}}], [], - False + False, ), ( - 'Unicode', - [{ - 'ROOT_ID_TITLE': UNICODE_TEST_STRING, - UNICODE_TEST_STRING: UNICODE_TEST_STRING - }], - [{ - 'ROOT_ID': UNICODE_TEST_STRING, - 'testU': UNICODE_TEST_STRING - }], + "Unicode", + [ + { + "ROOT_ID_TITLE": UNICODE_TEST_STRING, + UNICODE_TEST_STRING: UNICODE_TEST_STRING, + } + ], + [{"ROOT_ID": UNICODE_TEST_STRING, "testU": UNICODE_TEST_STRING}], [], - True + True, ), - ( - 'Single item array', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'R title:Identifier': 3, - 'R title:B title': 4 - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testR': [{ - 'id': '3', 'testB': '4' - }], - }], + ( + "Single item array", + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "R title:Identifier": 3, + "R title:B title": 4, + } + ], + [{"ROOT_ID": 1, "id": 2, "testR": [{"id": "3", "testB": "4"}],}], [], - False + False, ), ( - 'Single item array without parent ID', - [{ - 'ROOT_ID_TITLE': '1', - 'R title:Identifier': '2', - 'R title:B title': '3' - }], - [{ - 'ROOT_ID': '1', - 'testR': [{ - 'id': '2', - 'testB': '3' - }], - }], + "Single item array without parent ID", + [{"ROOT_ID_TITLE": "1", "R title:Identifier": "2", "R title:B title": "3"}], + [{"ROOT_ID": "1", "testR": [{"id": "2", "testB": "3"}],}], [], - False + False, ), ( - ''' + """ Properties of a single item array shouldn't need to be in rollUp list for their titles to be converted - ''', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'R title:Identifier': 3, - 'R title:C title': 4 - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testR': [{ - 'id': '3', - 'testC': '4' - }], - }], + """, + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "R title:Identifier": 3, + "R title:C title": 4, + } + ], + [{"ROOT_ID": 1, "id": 2, "testR": [{"id": "3", "testC": "4"}],}], [], - False + False, ), ( - 'Single item array, titles should be converted individually', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'R title:C title': 3, - 'R title:Not in schema': 4, - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testR': [{ - 'testC': '3', - 'Not in schema': 4 - }], - }], + "Single item array, titles should be converted individually", + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "R title:C title": 3, + "R title:Not in schema": 4, + } + ], + [{"ROOT_ID": 1, "id": 2, "testR": [{"testC": "3", "Not in schema": 4}],}], [], - False + False, ), ( - 'Multi item array, allow numbering', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'R title:C title': 3, - 'R title:Not in schema': 4, - 'R title:0:C title': 5, - 'R title:0:Not in schema': 6, - 'R title:5:C title': 7, - 'R title:5:Not in schema': 8, - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testR': [{ - 'testC': '3', - 'Not in schema': 4 - }, + "Multi item array, allow numbering", + [ { - 'testC': '5', - 'Not in schema': 6 - }, + "ROOT_ID_TITLE": 1, + "Identifier": 2, + "R title:C title": 3, + "R title:Not in schema": 4, + "R title:0:C title": 5, + "R title:0:Not in schema": 6, + "R title:5:C title": 7, + "R title:5:Not in schema": 8, + } + ], + [ { - 'testC': '7', - 'Not in schema': 8 + "ROOT_ID": 1, + "id": 2, + "testR": [ + {"testC": "3", "Not in schema": 4}, + {"testC": "5", "Not in schema": 6}, + {"testC": "7", "Not in schema": 8}, + ], } - ] - }], + ], [], - False + False, ), ( - 'Empty', - [{ - 'ROOT_ID_TITLE': '', - 'Identifier': '', - 'A title': '', - 'B title': '', - 'C title': '', - 'D title': '', - 'E title': '', - }], + "Empty", + [ + { + "ROOT_ID_TITLE": "", + "Identifier": "", + "A title": "", + "B title": "", + "C title": "", + "D title": "", + "E title": "", + } + ], [], [], - False + False, ), ( - 'Empty except for root id', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': '', - 'A title': '', - 'B title': '', - 'C title': '', - 'D title': '', - 'E title': '', - }], - [{ - 'ROOT_ID': 1 - }], + "Empty except for root id", + [ + { + "ROOT_ID_TITLE": 1, + "Identifier": "", + "A title": "", + "B title": "", + "C title": "", + "D title": "", + "E title": "", + } + ], + [{"ROOT_ID": 1}], [], - False + False, ), ( - 'Test arrays of strings (1 item)', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'SA title': 'a', - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testSA': [ 'a' ], - }], + "Test arrays of strings (1 item)", + [{"ROOT_ID_TITLE": 1, "Identifier": 2, "SA title": "a",}], + [{"ROOT_ID": 1, "id": 2, "testSA": ["a"],}], [], - True + True, ), ( - 'Test arrays of strings (2 items)', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'SA title': 'a;b', - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testSA': [ 'a', 'b' ], - }], + "Test arrays of strings (2 items)", + [{"ROOT_ID_TITLE": 1, "Identifier": 2, "SA title": "a;b",}], + [{"ROOT_ID": 1, "id": 2, "testSA": ["a", "b"],}], [], - True + True, ), ( - 'Test arrays of strings within an object array (1 item)', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'R title:SA title': 'a', - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testR': [{ - 'testSA': [ 'a' ], - }] - }], + "Test arrays of strings within an object array (1 item)", + [{"ROOT_ID_TITLE": 1, "Identifier": 2, "R title:SA title": "a",}], + [{"ROOT_ID": 1, "id": 2, "testR": [{"testSA": ["a"],}]}], [], - False + False, ), ( - 'Test arrays of strings within an object array (2 items)', - [{ - 'ROOT_ID_TITLE': 1, - 'Identifier': 2, - 'R title:SA title': 'a;b', - }], - [{ - 'ROOT_ID': 1, - 'id': 2, - 'testR': [{ - 'testSA': [ 'a', 'b' ], - }] - }], + "Test arrays of strings within an object array (2 items)", + [{"ROOT_ID_TITLE": 1, "Identifier": 2, "R title:SA title": "a;b",}], + [{"ROOT_ID": 1, "id": 2, "testR": [{"testSA": ["a", "b"],}]}], [], - False + False, ), ] -ROOT_ID_PARAMS = [ - ('ocid', {}), # If not root_id kwarg is passed, then a root_id of ocid is assumed - ('ocid', {'root_id': 'ocid'}), - ('custom', {'root_id': 'custom'}), - ('', {'root_id': ''}) - ] +ROOT_ID_PARAMS = [ + ("ocid", {}), # If not root_id kwarg is passed, then a root_id of ocid is assumed + ("ocid", {"root_id": "ocid"}), + ("custom", {"root_id": "custom"}), + ("", {"root_id": ""}), +] # Since we're not using titles, and titles mode should fall back to assuming # we've supplied a fieldname, we should be able to run this test with # convert_titles and use_schema as True or False -@pytest.mark.parametrize('convert_titles', [True, False]) -@pytest.mark.parametrize('use_schema', [True, False]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -@pytest.mark.parametrize('comment,input_list,expected_output_list,warning_messages,reversible', testdata) -def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, reversible): +@pytest.mark.parametrize("convert_titles", [True, False]) +@pytest.mark.parametrize("use_schema", [True, False]) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,input_list,expected_output_list,warning_messages,reversible", testdata +) +def test_unflatten( + convert_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, +): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings - warnings.simplefilter('always') - extra_kwargs = {'convert_titles': convert_titles} + warnings.simplefilter("always") + + extra_kwargs = {"convert_titles": convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput( sheets={ - 'custom_main': [ + "custom_main": [ inject_root_id(root_id, input_row) for input_row in input_list ] }, - **extra_kwargs) + **extra_kwargs + ) spreadsheet_input.read_sheets() parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, - rollup=True + rollup=True, ) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ - inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list + inject_root_id(root_id, expected_output_dict) + for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary expected_output_list = [] assert list(spreadsheet_input.unflatten()) == expected_output_list # We expect no warning_messages - if not convert_titles: # TODO what are the warning_messages here + if not convert_titles: # TODO what are the warning_messages here assert [str(x.message) for x in recwarn.list] == warning_messages -@pytest.mark.parametrize('convert_titles', [True, False]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -@pytest.mark.parametrize('comment,input_list,expected_output_list,warning_messages', testdata_pointer) -def test_unflatten_pointer(convert_titles, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages): - return test_unflatten(convert_titles=convert_titles, use_schema=True, root_id=root_id, root_id_kwargs=root_id_kwargs, input_list=input_list, expected_output_list=expected_output_list, recwarn=recwarn, comment=comment, warning_messages=warning_messages, reversible=False) +@pytest.mark.parametrize("convert_titles", [True, False]) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,input_list,expected_output_list,warning_messages", testdata_pointer +) +def test_unflatten_pointer( + convert_titles, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, +): + return test_unflatten( + convert_titles=convert_titles, + use_schema=True, + root_id=root_id, + root_id_kwargs=root_id_kwargs, + input_list=input_list, + expected_output_list=expected_output_list, + recwarn=recwarn, + comment=comment, + warning_messages=warning_messages, + reversible=False, + ) -@pytest.mark.parametrize('comment,input_list,expected_output_list,warning_messages,reversible', testdata_titles) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -def test_unflatten_titles(root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, reversible): +@pytest.mark.parametrize( + "comment,input_list,expected_output_list,warning_messages,reversible", + testdata_titles, +) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +def test_unflatten_titles( + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, +): """ Essentially the same as test unflatten, except that convert_titles and use_schema are always true, as both of these are needed to convert titles properly. (and runs with different test data). """ - if root_id != '': + if root_id != "": # Skip all tests with a root ID for now, as this is broken # https://github.com/OpenDataServices/flatten-tool/issues/84 pytest.skip() - return test_unflatten(convert_titles=True, use_schema=True, root_id=root_id, root_id_kwargs=root_id_kwargs, input_list=input_list, expected_output_list=expected_output_list, recwarn=recwarn, comment=comment, warning_messages=warning_messages, reversible=reversible) - - + return test_unflatten( + convert_titles=True, + use_schema=True, + root_id=root_id, + root_id_kwargs=root_id_kwargs, + input_list=input_list, + expected_output_list=expected_output_list, + recwarn=recwarn, + comment=comment, + warning_messages=warning_messages, + reversible=reversible, + ) diff --git a/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py b/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py index fb5b4e0f..430ca8e6 100644 --- a/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py +++ b/flattentool/tests/test_input_SpreadsheetInput_unflatten_mulitplesheets.py @@ -5,784 +5,731 @@ Tests that only apply for multiple sheets. """ from __future__ import unicode_literals -from .test_input_SpreadsheetInput import ListInput -from .test_input_SpreadsheetInput_unflatten import ROOT_ID_PARAMS, create_schema, inject_root_id -from flattentool.schema import SchemaParser -from decimal import Decimal + from collections import OrderedDict -import sys + import pytest -import openpyxl -import datetime +from flattentool.schema import SchemaParser + +from .test_input_SpreadsheetInput import ListInput +from .test_input_SpreadsheetInput_unflatten import ( + ROOT_ID_PARAMS, + create_schema, + inject_root_id, +) testdata_multiplesheets = [ ( - 'Basic sub sheet', + "Basic sub sheet", { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'id': 2, - }, - { - 'ROOT_ID': 1, - 'id': 3, - } + "custom_main": [{"ROOT_ID": 1, "id": 2,}, {"ROOT_ID": 1, "id": 3,}], + "testArr": [ + {"ROOT_ID": 1, "id": 2, "testArr/0/testC": "3",}, + {"ROOT_ID": 1, "id": 2, "testArr/0/testC": "4",}, ], - 'testArr': [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr/0/testC': '3', - }, - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr/0/testC': '4', - } - ] }, [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr': [ - {'testC': '3'}, - {'testC': '4'}, - ] - }, - { - 'ROOT_ID': 1, - 'id': 3 - } + {"ROOT_ID": 1, "id": 2, "testArr": [{"testC": "3"}, {"testC": "4"},]}, + {"ROOT_ID": 1, "id": 3}, ], [], - True + True, ), ( - 'Nested sub sheet (with id)', + "Nested sub sheet (with id)", { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testB/id': 3, - 'testB/testC': 4, - } - ], - 'tes_subField': [ + "custom_main": [{"ROOT_ID": 1, "id": 2, "testB/id": 3, "testB/testC": 4,}], + "tes_subField": [ # It used to be neccesary to supply testA/id in this # situation, but now it's optional - { - 'ROOT_ID': 1, - 'id': 2, - 'testB/id': 3, - 'testB/subField/0/testD': 5, - } - ] + {"ROOT_ID": 1, "id": 2, "testB/id": 3, "testB/subField/0/testD": 5,} + ], }, [ - {'ROOT_ID': 1, 'id': 2, 'testB': { - 'id': 3, - 'testC': 4, - 'subField': [{'testD': 5}] - }} + { + "ROOT_ID": 1, + "id": 2, + "testB": {"id": 3, "testC": 4, "subField": [{"testD": 5}]}, + } ], [], - True + True, ), ( - 'Nested sub sheet (without id)', + "Nested sub sheet (without id)", { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testB/id': 3, - 'testB/testC': 4, - } - ], - 'sub': [ + "custom_main": [{"ROOT_ID": 1, "id": 2, "testB/id": 3, "testB/testC": 4,}], + "sub": [ # It used to be neccesary to supply testA/id in this # situation, but now it's optional - { - 'ROOT_ID': 1, - 'id': 2, - 'testB/subField/0/testD': 5, - } - ] + {"ROOT_ID": 1, "id": 2, "testB/subField/0/testD": 5,} + ], }, [ - {'ROOT_ID': 1, 'id': 2, 'testB': { - 'id': 3, - 'testC': 4, - 'subField': [{'testD': 5}] - }} + { + "ROOT_ID": 1, + "id": 2, + "testB": {"id": 3, "testC": 4, "subField": [{"testD": 5}]}, + } ], [], - False + False, ), ( - 'Basic two sub sheets', - OrderedDict([ - ('custom_main', [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('id', 6), - ]) - ]), - ('sub1Field', [ - { - 'ROOT_ID': 1, - 'id': 2, - 'sub1Field/0/id': 3, - 'sub1Field/0/testA': 4, - } - ]), - ('sub_sub2Field', [ - { - 'ROOT_ID': 1, - 'id': 2, - 'sub1Field/0/id': 3, - 'sub1Field/0/sub2Field/0/testB': 5, - } - ]) - ]), + "Basic two sub sheets", + OrderedDict( + [ + ( + "custom_main", + [ + OrderedDict([("ROOT_ID", 1), ("id", 2),]), + OrderedDict([("ROOT_ID", 1), ("id", 6),]), + ], + ), + ( + "sub1Field", + [ + { + "ROOT_ID": 1, + "id": 2, + "sub1Field/0/id": 3, + "sub1Field/0/testA": 4, + } + ], + ), + ( + "sub_sub2Field", + [ + { + "ROOT_ID": 1, + "id": 2, + "sub1Field/0/id": 3, + "sub1Field/0/sub2Field/0/testB": 5, + } + ], + ), + ] + ), [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('sub1Field', [ - { - 'id': 3, - 'testA': 4, - 'sub2Field': [ - { - 'testB': 5 - } - ] - } - ]) - ]), - { - 'ROOT_ID':1, - 'id': 6 - } + OrderedDict( + [ + ("ROOT_ID", 1), + ("id", 2), + ("sub1Field", [{"id": 3, "testA": 4, "sub2Field": [{"testB": 5}]}]), + ] + ), + {"ROOT_ID": 1, "id": 6}, ], [], - True + True, ), ( - 'Nested id', - { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'id': 2, - } + "Nested id", + { + "custom_main": [{"ROOT_ID": 1, "id": 2,}], + "subField": [ + {"ROOT_ID": 1, "id": 2, "subField/0/id": 3, "subField/0/testA/id": 4,} ], - 'subField': [ - { - 'ROOT_ID': 1, - 'id': 2, - 'subField/0/id': 3, - 'subField/0/testA/id': 4, - } - ] }, - [{'ROOT_ID': 1, 'id': 2, 'subField': [{'id': 3, 'testA': {'id': 4}}]}], + [{"ROOT_ID": 1, "id": 2, "subField": [{"id": 3, "testA": {"id": 4}}]}], [], - True + True, ), ( - 'Missing columns', + "Missing columns", { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'id': 2, - } + "custom_main": [{"ROOT_ID": 1, "id": 2,}], + "sub": [ + {"ROOT_ID": 1, "id": "", "subField/0/id": 3, "subField/0/testA": 4,}, + {"ROOT_ID": 1, "id": 2, "subField/0/id": 3, "subField/0/testA": 5,}, ], - 'sub': [ - { - 'ROOT_ID': 1, - 'id': '', - 'subField/0/id': 3, - 'subField/0/testA': 4, - }, - { - 'ROOT_ID': 1, - 'id': 2, - 'subField/0/id': 3, - 'subField/0/testA': 5, - } - ] }, [ - {'ROOT_ID': 1, 'id': 2, 'subField': [{'id': 3, 'testA': 5}]}, - {'ROOT_ID': 1, 'subField': [{'id': 3, 'testA': 4}]}, + {"ROOT_ID": 1, "id": 2, "subField": [{"id": 3, "testA": 5}]}, + {"ROOT_ID": 1, "subField": [{"id": 3, "testA": 4}]}, ], [], - False + False, ), ( - 'Unmatched id', - OrderedDict([ - ('custom_main', [ - { - 'ROOT_ID': 1, - 'id': 2, - } - ]), - ('sub', [ - { - 'ROOT_ID': 1, - 'id': 100, - 'subField/0/id': 3, - 'subField/0/testA': 4, - }, - { - 'ROOT_ID': 1, - 'id': 2, - 'subField/0/id': 3, - 'subField/0/testA': 5, - } - ]) - ]), + "Unmatched id", + OrderedDict( + [ + ("custom_main", [{"ROOT_ID": 1, "id": 2,}]), + ( + "sub", + [ + { + "ROOT_ID": 1, + "id": 100, + "subField/0/id": 3, + "subField/0/testA": 4, + }, + { + "ROOT_ID": 1, + "id": 2, + "subField/0/id": 3, + "subField/0/testA": 5, + }, + ], + ), + ] + ), [ - {'ROOT_ID': 1, 'id': 2, 'subField': [{'id': 3, 'testA': 5}]}, - {'ROOT_ID': 1, 'id': 100, 'subField': [{'id': 3, 'testA': 4}]}, + {"ROOT_ID": 1, "id": 2, "subField": [{"id": 3, "testA": 5}]}, + {"ROOT_ID": 1, "id": 100, "subField": [{"id": 3, "testA": 4}]}, ], [], - False + False, ), ( - 'Test same rollup', + "Test same rollup", { - 'main': [ + "main": [ { - 'ROOT_ID': 1, - 'id': 2, - 'testC': 3, - 'testArr/0/id': '4', - 'testArr/0/testB': '5', + "ROOT_ID": 1, + "id": 2, + "testC": 3, + "testArr/0/id": "4", + "testArr/0/testB": "5", }, - { - 'ROOT_ID': 6, - 'id': 7, - 'testC': 8, - 'testArr/0/testB': '9', - } + {"ROOT_ID": 6, "id": 7, "testC": 8, "testArr/0/testB": "9",}, + ], + "testArr": [ + {"ROOT_ID": 1, "id": 2, "testArr/0/id": "4", "testArr/0/testB": "5",}, + {"ROOT_ID": 6, "id": 7, "testArr/0/testB": "9",}, ], - 'testArr': [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr/0/id': '4', - 'testArr/0/testB': '5', - }, - { - 'ROOT_ID': 6, - 'id': 7, - 'testArr/0/testB': '9', - } - ] }, [ - {'ROOT_ID': 1, 'id': 2, 'testC':3, 'testArr': [{'id': '4', 'testB': '5'}]}, - {'ROOT_ID': 6, 'id': 7, 'testC':8, 'testArr': [ - {'testB': '9'}, {'testB': '9'} - # We have duplicates here because there's no ID to merge these - # on. This is different to the old behaviour. Issue filed at - # https://github.com/OpenDataServices/flatten-tool/issues/99 - ]}, + {"ROOT_ID": 1, "id": 2, "testC": 3, "testArr": [{"id": "4", "testB": "5"}]}, + { + "ROOT_ID": 6, + "id": 7, + "testC": 8, + "testArr": [ + {"testB": "9"}, + {"testB": "9"} + # We have duplicates here because there's no ID to merge these + # on. This is different to the old behaviour. Issue filed at + # https://github.com/OpenDataServices/flatten-tool/issues/99 + ], + }, ], [], - False + False, ), ( - 'Test conflicting rollup', - OrderedDict([ - ('main', [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr/0/id': '3', - 'testArr/0/testB': '4' - } - ]), - ('testArr', [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr/0/id': '3', - 'testArr/0/testB': '5', - } - ]) - ]), + "Test conflicting rollup", + OrderedDict( + [ + ( + "main", + [ + { + "ROOT_ID": 1, + "id": 2, + "testArr/0/id": "3", + "testArr/0/testB": "4", + } + ], + ), + ( + "testArr", + [ + { + "ROOT_ID": 1, + "id": 2, + "testArr/0/id": "3", + "testArr/0/testB": "5", + } + ], + ), + ] + ), [ { - 'ROOT_ID': 1, - 'id': 2, - 'testArr': [{ - 'id': '3', - 'testB': '4' - # (Since sheets are parsed in the order they appear, and the first value is used). - }] + "ROOT_ID": 1, + "id": 2, + "testArr": [ + { + "id": "3", + "testB": "4" + # (Since sheets are parsed in the order they appear, and the first value is used). + } + ], } ], - ['Conflict when merging field "testB" for ROOT_ID "1", id "2" in sheet testA: "4" != "5"'], - False + [ + 'Conflict when merging field "testB" for ROOT_ID "1", id "2" in sheet testA: "4" != "5"' + ], + False, ), ( - 'Unflatten empty', + "Unflatten empty", { - 'custom_main': [], - 'subsheet': [ + "custom_main": [], + "subsheet": [ { - 'ROOT_ID': '', - 'id': '', - 'testA': '', - 'testB': '', - 'testC': '', - 'testD': '', + "ROOT_ID": "", + "id": "", + "testA": "", + "testB": "", + "testC": "", + "testD": "", } - ] + ], }, [], [], - False - ) + False, + ), ] - testdata_multiplesheets_pointer = [ ( - 'with schema', + "with schema", { - 'custom_main': [ + "custom_main": [{"ROOT_ID": 1, "id": "2", "testA": 3}], + "sub": [ { - 'ROOT_ID': 1, - 'id': '2', - 'testA': 3 + "ROOT_ID": 1, + "id": 2, + "testArr/testB": 4, # test that we can infer this an array from schema } ], - 'sub': [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr/testB': 4 # test that we can infer this an array from schema - } - ] }, - [{ - 'ROOT_ID': 1, - 'id': 2, # check that we join correctly when this gets converted to an - # integer because of the schema type - 'testA': 3, - 'testArr': [{ - 'testB': '4' - }] - }], - [] + [ + { + "ROOT_ID": 1, + "id": 2, # check that we join correctly when this gets converted to an + # integer because of the schema type + "testA": 3, + "testArr": [{"testB": "4"}], + } + ], + [], ) ] testdata_multiplesheets_titles = [ ( - 'Basic sub sheet', + "Basic sub sheet", { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - }, - { - 'ROOT_ID': 1, - 'Identifier': 3, - } + "custom_main": [ + {"ROOT_ID": 1, "Identifier": 2,}, + {"ROOT_ID": 1, "Identifier": 3,}, + ], + "testArr": [ + {"ROOT_ID": 1, "Identifier": 2, "Arr title:C title": "3",}, + {"ROOT_ID": 1, "Identifier": 2, "Arr title:C title": "4",}, ], - 'testArr': [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:C title': '3', - }, - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:C title': '4', - } - ] }, [ - { - 'ROOT_ID': 1, - 'id': 2, - 'testArr': [ - {'testC': '3'}, - {'testC': '4'}, - ] - }, - { - 'ROOT_ID': 1, - 'id': 3 - } + {"ROOT_ID": 1, "id": 2, "testArr": [{"testC": "3"}, {"testC": "4"},]}, + {"ROOT_ID": 1, "id": 3}, ], [], - True + True, ), ( - 'Nested sub sheet (with id)', + "Nested sub sheet (with id)", { - 'custom_main': [ + "custom_main": [ { - 'ROOT_ID': 1, - 'Identifier': 2, - 'B title:Identifier': 3, - 'B title:C title': 4, + "ROOT_ID": 1, + "Identifier": 2, + "B title:Identifier": 3, + "B title:C title": 4, } ], - 'tes_subField': [ + "tes_subField": [ # It used to be neccesary to supply testA/id in this # situation, but now it's optional { - 'ROOT_ID': 1, - 'Identifier': 2, - 'B title:Identifier': 3, - 'B title:Sub title:E title': 5, + "ROOT_ID": 1, + "Identifier": 2, + "B title:Identifier": 3, + "B title:Sub title:E title": 5, } - ] + ], }, [ - {'ROOT_ID': 1, 'id': 2, 'testB': { - 'id': 3, - 'testC': 4, - 'subField': [{'testE': 5}] - }} + { + "ROOT_ID": 1, + "id": 2, + "testB": {"id": 3, "testC": 4, "subField": [{"testE": 5}]}, + } ], [], - True + True, ), ( - 'Nested sub sheet (without id)', + "Nested sub sheet (without id)", { - 'custom_main': [ + "custom_main": [ { - 'ROOT_ID': 1, - 'Identifier': 2, - 'B title:Identifier': 3, - 'B title:C title': 4, + "ROOT_ID": 1, + "Identifier": 2, + "B title:Identifier": 3, + "B title:C title": 4, } ], - 'sub': [ + "sub": [ # It used to be neccesary to supply testA/id in this # situation, but now it's optional - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'B title:Sub title:E title': 5, - } - ] + {"ROOT_ID": 1, "Identifier": 2, "B title:Sub title:E title": 5,} + ], }, [ - {'ROOT_ID': 1, 'id': 2, 'testB': { - 'id': 3, - 'testC': 4, - 'subField': [{'testE': 5}] - }} + { + "ROOT_ID": 1, + "id": 2, + "testB": {"id": 3, "testC": 4, "subField": [{"testE": 5}]}, + } ], [], - False + False, ), ( - 'Basic two sub sheets', - OrderedDict([ - ('custom_main', [ - OrderedDict([ - ('ROOT_ID', 1), - ('Identifier', 2), - ]), - OrderedDict([ - ('ROOT_ID', 1), - ('Identifier', 6), - ]) - ]), - ('testArr', [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': '3', - 'Arr title:B title': '4', - } - ]), - ('tes_testNest', [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': '3', - 'Arr title:Nest title:D title': '5', - } - ]) - ]), + "Basic two sub sheets", + OrderedDict( + [ + ( + "custom_main", + [ + OrderedDict([("ROOT_ID", 1), ("Identifier", 2),]), + OrderedDict([("ROOT_ID", 1), ("Identifier", 6),]), + ], + ), + ( + "testArr", + [ + { + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": "3", + "Arr title:B title": "4", + } + ], + ), + ( + "tes_testNest", + [ + { + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": "3", + "Arr title:Nest title:D title": "5", + } + ], + ), + ] + ), [ - OrderedDict([ - ('ROOT_ID', 1), - ('id', 2), - ('testArr', [ - { - 'id': '3', - 'testB': '4', - 'testNest': [ - { - 'testD': '5' - } - ] - } - ]) - ]), - { - 'ROOT_ID':1, - 'id': 6 - } + OrderedDict( + [ + ("ROOT_ID", 1), + ("id", 2), + ( + "testArr", + [{"id": "3", "testB": "4", "testNest": [{"testD": "5"}]}], + ), + ] + ), + {"ROOT_ID": 1, "id": 6}, ], [], - True + True, ), - ( - 'Nested id', - { - 'custom_main': [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - } - ], - 'testArr': [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': '3', - 'Arr title:NestObj title:Identifier': '4', - } - ] - }, - [{'ROOT_ID': 1, 'id': 2, 'testArr': [{'id': '3', 'testNestObj': {'id': '4'}}]}], - [], - True - ), ( - 'Missing columns', + "Nested id", { - 'custom_main': [ + "custom_main": [{"ROOT_ID": 1, "Identifier": 2,}], + "testArr": [ { - 'ROOT_ID': 1, - 'Identifier': 2, + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": "3", + "Arr title:NestObj title:Identifier": "4", } ], - 'sub': [ + }, + [{"ROOT_ID": 1, "id": 2, "testArr": [{"id": "3", "testNestObj": {"id": "4"}}]}], + [], + True, + ), + ( + "Missing columns", + { + "custom_main": [{"ROOT_ID": 1, "Identifier": 2,}], + "sub": [ { - 'ROOT_ID': 1, - 'Identifier': '', - 'Arr title:Identifier': 3, - 'Arr title:B title': 4, + "ROOT_ID": 1, + "Identifier": "", + "Arr title:Identifier": 3, + "Arr title:B title": 4, }, { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': 3, - 'Arr title:B title': 5, - } - ] + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": 3, + "Arr title:B title": 5, + }, + ], }, [ - {'ROOT_ID': 1, 'id': 2, 'testArr': [{'id': '3', 'testB': '5'}]}, - {'ROOT_ID': 1, 'testArr': [{'id': '3', 'testB': '4'}]}, + {"ROOT_ID": 1, "id": 2, "testArr": [{"id": "3", "testB": "5"}]}, + {"ROOT_ID": 1, "testArr": [{"id": "3", "testB": "4"}]}, ], [], - False + False, ), ( - 'Unmatched id', - OrderedDict([ - ('custom_main', [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - } - ]), - ('sub', [ - { - 'ROOT_ID': 1, - 'Identifier': 100, - 'Arr title:Identifier': 3, - 'Arr title:B title': 4, - }, - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': 3, - 'Arr title:B title': 5, - } - ]) - ]), + "Unmatched id", + OrderedDict( + [ + ("custom_main", [{"ROOT_ID": 1, "Identifier": 2,}]), + ( + "sub", + [ + { + "ROOT_ID": 1, + "Identifier": 100, + "Arr title:Identifier": 3, + "Arr title:B title": 4, + }, + { + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": 3, + "Arr title:B title": 5, + }, + ], + ), + ] + ), [ - {'ROOT_ID': 1, 'id': 2, 'testArr': [{'id': '3', 'testB': '5'}]}, - {'ROOT_ID': 1, 'id': 100, 'testArr': [{'id': '3', 'testB': '4'}]}, + {"ROOT_ID": 1, "id": 2, "testArr": [{"id": "3", "testB": "5"}]}, + {"ROOT_ID": 1, "id": 100, "testArr": [{"id": "3", "testB": "4"}]}, ], [], - False + False, ), ( - 'Test same rollup', + "Test same rollup", { - 'main': [ + "main": [ { - 'ROOT_ID': 1, - 'Identifier': 2, - 'A title': 3, - 'Arr title:Identifier': 4, - 'Arr title:B title': 5, + "ROOT_ID": 1, + "Identifier": 2, + "A title": 3, + "Arr title:Identifier": 4, + "Arr title:B title": 5, }, - { - 'ROOT_ID': 6, - 'Identifier': 7, - 'A title': 8, - 'Arr title:B title': 9, - } + {"ROOT_ID": 6, "Identifier": 7, "A title": 8, "Arr title:B title": 9,}, ], - 'testArr': [ + "testArr": [ { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': 4, - 'Arr title:B title': 5, + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": 4, + "Arr title:B title": 5, }, - { - 'ROOT_ID': 6, - 'Identifier': 7, - 'Arr title:B title': 9, - } - ] + {"ROOT_ID": 6, "Identifier": 7, "Arr title:B title": 9,}, + ], }, [ - {'ROOT_ID': 1, 'id': 2, 'testA':3, 'testArr': [{'id': '4', 'testB': '5'}]}, - {'ROOT_ID': 6, 'id': 7, 'testA':8, 'testArr': [ - {'testB': '9'}, {'testB': '9'} - # We have duplicates here because there's no ID to merge these - # on. This is different to the old behaviour. Issue filed at - # https://github.com/OpenDataServices/flatten-tool/issues/99 - ]}, + {"ROOT_ID": 1, "id": 2, "testA": 3, "testArr": [{"id": "4", "testB": "5"}]}, + { + "ROOT_ID": 6, + "id": 7, + "testA": 8, + "testArr": [ + {"testB": "9"}, + {"testB": "9"} + # We have duplicates here because there's no ID to merge these + # on. This is different to the old behaviour. Issue filed at + # https://github.com/OpenDataServices/flatten-tool/issues/99 + ], + }, ], [], - False + False, ), ( - 'Test conflicting rollup', - OrderedDict([ - ('main', [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': '3', - 'Arr title:B title': '4' - } - ]), - ('testArr', [ - { - 'ROOT_ID': 1, - 'Identifier': 2, - 'Arr title:Identifier': '3', - 'Arr title:B title': '5', - } - ]) - ]), + "Test conflicting rollup", + OrderedDict( + [ + ( + "main", + [ + { + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": "3", + "Arr title:B title": "4", + } + ], + ), + ( + "testArr", + [ + { + "ROOT_ID": 1, + "Identifier": 2, + "Arr title:Identifier": "3", + "Arr title:B title": "5", + } + ], + ), + ] + ), [ { - 'ROOT_ID': 1, - 'id': 2, - 'testArr': [{ - 'id': '3', - 'testB': '4' - # (Since sheets are parsed in the order they appear, and the first value is used). - }] + "ROOT_ID": 1, + "id": 2, + "testArr": [ + { + "id": "3", + "testB": "4" + # (Since sheets are parsed in the order they appear, and the first value is used). + } + ], } ], - ['Conflict when merging field "testB" for ROOT_ID "1", id "2" in sheet testA: "4" != "5"'], - False + [ + 'Conflict when merging field "testB" for ROOT_ID "1", id "2" in sheet testA: "4" != "5"' + ], + False, ), ( - 'Unflatten empty', + "Unflatten empty", { - 'custom_main': [], - 'subsheet': [ - { - 'ROOT_ID': '', - 'Identifier': '', - 'A title': '', - 'U title': '', - } - ] + "custom_main": [], + "subsheet": [ + {"ROOT_ID": "", "Identifier": "", "A title": "", "U title": "",} + ], }, [], [], - False - ) + False, + ), ] -@pytest.mark.parametrize('convert_titles', [True, False]) -@pytest.mark.parametrize('use_schema', [True, False]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -@pytest.mark.parametrize('comment,input_dict,expected_output_list,warning_messages,reversible', testdata_multiplesheets) -def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages, reversible): - extra_kwargs = {'convert_titles': convert_titles} +@pytest.mark.parametrize("convert_titles", [True, False]) +@pytest.mark.parametrize("use_schema", [True, False]) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,input_dict,expected_output_list,warning_messages,reversible", + testdata_multiplesheets, +) +def test_unflatten( + convert_titles, + use_schema, + root_id, + root_id_kwargs, + input_dict, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, +): + extra_kwargs = {"convert_titles": convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput( - sheets=OrderedDict([(sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in input_dict.items()]), + sheets=OrderedDict( + [ + (sheet_name, [inject_root_id(root_id, line) for line in lines]) + for sheet_name, lines in input_dict.items() + ] + ), **extra_kwargs - ) + ) spreadsheet_input.read_sheets() parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, - rollup=True + rollup=True, ) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ - inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list + inject_root_id(root_id, expected_output_dict) + for expected_output_dict in expected_output_list ] assert list(spreadsheet_input.unflatten()) == expected_output_list -@pytest.mark.parametrize('convert_titles', [True, False]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -@pytest.mark.parametrize('comment,input_dict,expected_output_list,warning_messages', testdata_multiplesheets_pointer) -def test_unflatten_pointer(convert_titles, root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages): - return test_unflatten(convert_titles=convert_titles, use_schema=True, root_id=root_id, root_id_kwargs=root_id_kwargs, input_dict=input_dict, expected_output_list=expected_output_list, recwarn=recwarn, comment=comment, warning_messages=warning_messages, reversible=False) +@pytest.mark.parametrize("convert_titles", [True, False]) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,input_dict,expected_output_list,warning_messages", + testdata_multiplesheets_pointer, +) +def test_unflatten_pointer( + convert_titles, + root_id, + root_id_kwargs, + input_dict, + expected_output_list, + recwarn, + comment, + warning_messages, +): + return test_unflatten( + convert_titles=convert_titles, + use_schema=True, + root_id=root_id, + root_id_kwargs=root_id_kwargs, + input_dict=input_dict, + expected_output_list=expected_output_list, + recwarn=recwarn, + comment=comment, + warning_messages=warning_messages, + reversible=False, + ) -@pytest.mark.parametrize('comment,input_dict,expected_output_list,warning_messages,reversible', testdata_multiplesheets_titles) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -def test_unflatten_titles(root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages, reversible): +@pytest.mark.parametrize( + "comment,input_dict,expected_output_list,warning_messages,reversible", + testdata_multiplesheets_titles, +) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +def test_unflatten_titles( + root_id, + root_id_kwargs, + input_dict, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, +): """ Essentially the same as test unflatten, except that convert_titles and use_schema are always true, as both of these are needed to convert titles properly. (and runs with different test data). """ - if root_id != '': + if root_id != "": # Skip all tests with a root ID for now, as this is broken # https://github.com/OpenDataServices/flatten-tool/issues/84 pytest.skip() - return test_unflatten(convert_titles=True, use_schema=True, root_id=root_id, root_id_kwargs=root_id_kwargs, input_dict=input_dict, expected_output_list=expected_output_list, recwarn=recwarn, comment=comment, warning_messages=warning_messages, reversible=reversible) - + return test_unflatten( + convert_titles=True, + use_schema=True, + root_id=root_id, + root_id_kwargs=root_id_kwargs, + input_dict=input_dict, + expected_output_list=expected_output_list, + recwarn=recwarn, + comment=comment, + warning_messages=warning_messages, + reversible=reversible, + ) diff --git a/flattentool/tests/test_json_input.py b/flattentool/tests/test_json_input.py index c3a5b70d..5178937f 100644 --- a/flattentool/tests/test_json_input.py +++ b/flattentool/tests/test_json_input.py @@ -1,19 +1,26 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals + import os -from flattentool.json_input import JSONParser, BadlyFormedJSONError, BadlyFormedJSONErrorUTF8 +from collections import OrderedDict + +import pytest + +from flattentool.json_input import ( + BadlyFormedJSONError, + BadlyFormedJSONErrorUTF8, + JSONParser, +) from flattentool.schema import SchemaParser from flattentool.tests.test_schema_parser import object_in_array_example_properties -import pytest -from collections import OrderedDict def listify(d): - return {k:list(v) for k,v in d.items()} + return {k: list(v) for k, v in d.items()} def test_jsonparser_bad_json(tmpdir): - test_json = tmpdir.join('test.json') + test_json = tmpdir.join("test.json") test_json.write('{"a":"b",}') with pytest.raises(BadlyFormedJSONError): JSONParser(json_filename=test_json.strpath) @@ -23,7 +30,9 @@ def test_jsonparser_bad_json(tmpdir): def test_jsonparser_bad_json_utf8(): - name = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'fixtures', 'bad-utf8.json') + name = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "fixtures", "bad-utf8.json" + ) # matches against the special error type with pytest.raises(BadlyFormedJSONErrorUTF8): JSONParser(json_filename=name) @@ -40,8 +49,8 @@ def test_jsonparser_arguments_exceptions(tmpdir): Test that JSONParser throws a ValueError if it recievs too many or too few arguments. """ - test_json = tmpdir.join('test.json') - test_json.write('{}') + test_json = tmpdir.join("test.json") + test_json.write("{}") with pytest.raises(ValueError): JSONParser() with pytest.raises(ValueError): @@ -49,24 +58,24 @@ def test_jsonparser_arguments_exceptions(tmpdir): def test_json_filename(tmpdir): - test_json = tmpdir.join('test.json') + test_json = tmpdir.join("test.json") test_json.write('{"a":"b"}') parser = JSONParser(json_filename=test_json.strpath) - assert parser.root_json_dict == {'a':'b'} + assert parser.root_json_dict == {"a": "b"} def test_json_filename_utf8(tmpdir): - test_json = tmpdir.join('test.json') - test_json.write_text('{"a":"éαГ😼𝒞人"}', encoding='utf-8') + test_json = tmpdir.join("test.json") + test_json.write_text('{"a":"éαГ😼𝒞人"}', encoding="utf-8") parser = JSONParser(json_filename=test_json.strpath) - assert parser.root_json_dict == {'a':'éαГ😼𝒞人'} + assert parser.root_json_dict == {"a": "éαГ😼𝒞人"} def test_json_filename_ordered(tmpdir): - test_json = tmpdir.join('test.json') + test_json = tmpdir.join("test.json") test_json.write('{"a":"b", "c": "d"}') parser = JSONParser(json_filename=test_json.strpath) - assert list(parser.root_json_dict.items()) == [('a','b'), ('c','d')] + assert list(parser.root_json_dict.items()) == [("a", "b"), ("c", "d")] def test_parse_empty_json_dict(): @@ -78,209 +87,210 @@ def test_parse_empty_json_dict(): def test_parse_basic_json_dict(): - parser = JSONParser(root_json_dict=[ - OrderedDict([ - ('a', 'b'), - ('c', 'd'), - ]), - OrderedDict([ - ('a', 'e'), - ('c', 'f'), - ]), - ]) + parser = JSONParser( + root_json_dict=[ + OrderedDict([("a", "b"), ("c", "d"),]), + OrderedDict([("a", "e"), ("c", "f"),]), + ] + ) parser.parse() - assert list(parser.main_sheet) == [ 'a', 'c' ] + assert list(parser.main_sheet) == ["a", "c"] assert parser.main_sheet.lines == [ - {'a': 'b', 'c': 'd'}, - {'a': 'e', 'c': 'f'}, + {"a": "b", "c": "d"}, + {"a": "e", "c": "f"}, ] assert parser.sub_sheets == {} def test_parse_nested_dict_json_dict(): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('a', 'b'), - ('c', OrderedDict([('d', 'e')])), - ])]) + parser = JSONParser( + root_json_dict=[OrderedDict([("a", "b"), ("c", OrderedDict([("d", "e")])),])] + ) parser.parse() - assert list(parser.main_sheet) == [ 'a', 'c/d' ] - assert parser.main_sheet.lines == [ - {'a': 'b', 'c/d': 'e'} - ] + assert list(parser.main_sheet) == ["a", "c/d"] + assert parser.main_sheet.lines == [{"a": "b", "c/d": "e"}] assert parser.sub_sheets == {} def test_parse_nested_list_json_dict(): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('a', 'b'), - ('c', [OrderedDict([('d', 'e')])]), - ])]) + parser = JSONParser( + root_json_dict=[OrderedDict([("a", "b"), ("c", [OrderedDict([("d", "e")])]),])] + ) parser.parse() - assert list(parser.main_sheet) == [ 'a' ] - assert parser.main_sheet.lines == [ - {'a': 'b'} - ] - listify(parser.sub_sheets) == {'c': ['d']} - parser.sub_sheets['c'].lines == [{'d':'e'}] + assert list(parser.main_sheet) == ["a"] + assert parser.main_sheet.lines == [{"a": "b"}] + listify(parser.sub_sheets) == {"c": ["d"]} + parser.sub_sheets["c"].lines == [{"d": "e"}] def test_parse_array(): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('testarray', ['item','anotheritem', 42]) - ])]) + parser = JSONParser( + root_json_dict=[OrderedDict([("testarray", ["item", "anotheritem", 42])])] + ) parser.parse() - assert list(parser.main_sheet) == [ 'testarray' ] - assert parser.main_sheet.lines == [ - { - 'testarray': 'item;anotheritem;42' - } - ] + assert list(parser.main_sheet) == ["testarray"] + assert parser.main_sheet.lines == [{"testarray": "item;anotheritem;42"}] assert parser.sub_sheets == {} def test_root_list_path(): parser = JSONParser( - root_json_dict={'custom_key': [OrderedDict([ - ('a', 'b'), - ('c', 'd'), - ])]}, - root_list_path='custom_key') + root_json_dict={"custom_key": [OrderedDict([("a", "b"), ("c", "d"),])]}, + root_list_path="custom_key", + ) parser.parse() - assert list(parser.main_sheet) == [ 'a', 'c' ] - assert parser.main_sheet.lines == [ - {'a': 'b', 'c': 'd'} - ] + assert list(parser.main_sheet) == ["a", "c"] + assert parser.main_sheet.lines == [{"a": "b", "c": "d"}] assert parser.sub_sheets == {} class TestParseIDs(object): def test_parse_ids(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('ocid', 1), - ('id', 2), - ('a', 'b'), - ('c', [OrderedDict([('id', 3), ('d', 'e')]), OrderedDict([('id', 3), ('d', 'e2')])]), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ])], root_id='ocid') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("ocid", 1), + ("id", 2), + ("a", "b"), + ( + "c", + [ + OrderedDict([("id", 3), ("d", "e")]), + OrderedDict([("id", 3), ("d", "e2")]), + ], + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + root_id="ocid", + ) parser.parse() - assert list(parser.main_sheet) == [ 'ocid', 'id', 'a', 'f/g' ] - assert parser.main_sheet.lines == [ - { - 'ocid': 1, - 'id': 2, - 'a': 'b', - 'f/g': 'h' - } - ] - listify(parser.sub_sheets) == {'c': ['ocid','id','c/0/id','c/0/d']} - assert parser.sub_sheets['c'].lines == [ - { - 'ocid': 1, - 'id': 2, - 'c/0/id': 3, - 'c/0/d':'e' - }, - { - 'ocid': 1, - 'id': 2, - 'c/0/id': 3, - 'c/0/d':'e2' - }, + assert list(parser.main_sheet) == ["ocid", "id", "a", "f/g"] + assert parser.main_sheet.lines == [{"ocid": 1, "id": 2, "a": "b", "f/g": "h"}] + listify(parser.sub_sheets) == {"c": ["ocid", "id", "c/0/id", "c/0/d"]} + assert parser.sub_sheets["c"].lines == [ + {"ocid": 1, "id": 2, "c/0/id": 3, "c/0/d": "e"}, + {"ocid": 1, "id": 2, "c/0/id": 3, "c/0/d": "e2"}, ] def test_parse_ids_subsheet(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('ocid', 1), - ('id', 2), - ('testnest', [ - OrderedDict([ - ('id', 3), - ('a', 'b'), - ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])]), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ]) - ]) - ])], root_id='ocid') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("ocid", 1), + ("id", 2), + ( + "testnest", + [ + OrderedDict( + [ + ("id", 3), + ("a", "b"), + ( + "c", + [ + OrderedDict([("d", "e")]), + OrderedDict([("d", "e2")]), + ], + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + ), + ] + ) + ], + root_id="ocid", + ) parser.parse() - assert list(parser.main_sheet) == [ 'ocid', 'id' ] - assert parser.main_sheet.lines == [ - { - 'ocid': 1, - 'id': 2, - } - ] + assert list(parser.main_sheet) == ["ocid", "id"] + assert parser.main_sheet.lines == [{"ocid": 1, "id": 2,}] assert listify(parser.sub_sheets) == { - 'testnest': ['ocid', 'id', 'testnest/0/id', 'testnest/0/a', 'testnest/0/f/g'], - 'tes_c': ['ocid', 'id', 'testnest/0/id', 'testnest/0/c/0/d'] - } - assert parser.sub_sheets['testnest'].lines == [ - { - 'ocid': 1, - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/a': 'b', - 'testnest/0/f/g': 'h', - }, - ] - assert parser.sub_sheets['tes_c'].lines == [ - { - 'ocid': 1, - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/c/0/d':'e' - }, + "testnest": [ + "ocid", + "id", + "testnest/0/id", + "testnest/0/a", + "testnest/0/f/g", + ], + "tes_c": ["ocid", "id", "testnest/0/id", "testnest/0/c/0/d"], + } + assert parser.sub_sheets["testnest"].lines == [ { - 'ocid': 1, - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/c/0/d':'e2' + "ocid": 1, + "id": 2, + "testnest/0/id": 3, + "testnest/0/a": "b", + "testnest/0/f/g": "h", }, ] + assert parser.sub_sheets["tes_c"].lines == [ + {"ocid": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e"}, + {"ocid": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e2"}, + ] def test_parse_ids_nested(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('ocid', 1), - ('id', 2), - ('a', 'b'), - ('testnest', OrderedDict([ - ('id', 3), - ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])]) - ])), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ])], root_id='ocid') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("ocid", 1), + ("id", 2), + ("a", "b"), + ( + "testnest", + OrderedDict( + [ + ("id", 3), + ( + "c", + [ + OrderedDict([("d", "e")]), + OrderedDict([("d", "e2")]), + ], + ), + ] + ), + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + root_id="ocid", + ) parser.parse() - assert list(parser.main_sheet) == [ 'ocid', 'id', 'a', 'testnest/id', 'f/g' ] + assert list(parser.main_sheet) == ["ocid", "id", "a", "testnest/id", "f/g"] assert parser.main_sheet.lines == [ - { - 'ocid': 1, - 'id': 2, - 'a': 'b', - 'testnest/id': 3, - 'f/g': 'h' - } + {"ocid": 1, "id": 2, "a": "b", "testnest/id": 3, "f/g": "h"} ] - assert listify(parser.sub_sheets) == {'tes_c': ['ocid','id','testnest/id','testnest/c/0/d']} - assert parser.sub_sheets['tes_c'].lines == [ - { - 'ocid': 1, - 'id': 2, - 'testnest/id': 3, - 'testnest/c/0/d':'e' - }, - { - 'ocid': 1, - 'id': 2, - 'testnest/id': 3, - 'testnest/c/0/d':'e2' - }, + assert listify(parser.sub_sheets) == { + "tes_c": ["ocid", "id", "testnest/id", "testnest/c/0/d"] + } + assert parser.sub_sheets["tes_c"].lines == [ + {"ocid": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e"}, + {"ocid": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e2"}, ] class TestParseUsingSchema(object): - @pytest.mark.parametrize('remove_empty_schema_columns', [False, True]) + @pytest.mark.parametrize("remove_empty_schema_columns", [False, True]) def test_sub_sheets(self, tmpdir, remove_empty_schema_columns): - test_schema = tmpdir.join('test.json') - test_schema.write('''{ + test_schema = tmpdir.join("test.json") + test_schema.write( + """{ "properties": { "c": { "type": "array", @@ -303,411 +313,477 @@ def test_sub_sheets(self, tmpdir, remove_empty_schema_columns): "f": { "type": "string" } } } - }''') + }""" + ) schema_parser = SchemaParser( - schema_filename=test_schema.strpath, - root_id='ocid' + schema_filename=test_schema.strpath, root_id="ocid" ) schema_parser.parse() parser = JSONParser( - root_json_dict=[OrderedDict([ - ('a', 'b'), - ('c', [OrderedDict([('d', 'e')])]), - ])], + root_json_dict=[ + OrderedDict([("a", "b"), ("c", [OrderedDict([("d", "e")])]),]) + ], schema_parser=schema_parser, remove_empty_schema_columns=remove_empty_schema_columns, ) parser.parse() - assert list(parser.main_sheet) == [ 'a' ] - assert parser.main_sheet.lines == [ - {'a': 'b'} - ] + assert list(parser.main_sheet) == ["a"] + assert parser.main_sheet.lines == [{"a": "b"}] assert len(parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1 if not remove_empty_schema_columns: - assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d', 'c/0/f']) - assert list(parser.sub_sheets['g']) == list(['ocid', 'g/0/h']) + assert list(parser.sub_sheets["c"]) == list(["ocid", "c/0/d", "c/0/f"]) + assert list(parser.sub_sheets["g"]) == list(["ocid", "g/0/h"]) else: - assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d']) - assert parser.sub_sheets['c'].lines == [{'c/0/d':'e'}] + assert list(parser.sub_sheets["c"]) == list(["ocid", "c/0/d"]) + assert parser.sub_sheets["c"].lines == [{"c/0/d": "e"}] - def test_column_matching(self, tmpdir): - test_schema = tmpdir.join('test.json') - test_schema.write('''{ + def test_column_matching(self, tmpdir): + test_schema = tmpdir.join("test.json") + test_schema.write( + """{ "properties": { "c": { "type": "array", "items": {"type": "string"} } } - }''') - schema_parser = SchemaParser( - schema_filename=test_schema.strpath + }""" ) + schema_parser = SchemaParser(schema_filename=test_schema.strpath) schema_parser.parse() parser = JSONParser( - root_json_dict=[OrderedDict([ - ('c', ['d']), - ])], - schema_parser=schema_parser + root_json_dict=[OrderedDict([("c", ["d"]),])], schema_parser=schema_parser ) parser.parse() - assert list(parser.main_sheet) == [ 'c' ] - assert parser.main_sheet.lines == [ - {'c': 'd'} - ] + assert list(parser.main_sheet) == ["c"] + assert parser.main_sheet.lines == [{"c": "d"}] assert len(parser.sub_sheets) == 0 def test_rollup(self): - schema_parser = SchemaParser(root_schema_dict={ - 'properties': { - 'testA': { - 'type': 'array', - 'rollUp': [ 'testB' ], - 'items': { - 'type': 'object', - 'properties': { - 'testB': {'type': 'string'}, - 'testC': {'type': 'string'} - } - } - }, - } - }, rollup=True, root_id='ocid') + schema_parser = SchemaParser( + root_schema_dict={ + "properties": { + "testA": { + "type": "array", + "rollUp": ["testB"], + "items": { + "type": "object", + "properties": { + "testB": {"type": "string"}, + "testC": {"type": "string"}, + }, + }, + }, + } + }, + rollup=True, + root_id="ocid", + ) schema_parser.parse() parser = JSONParser( - root_json_dict=[OrderedDict([ - ('testA', [OrderedDict([('testB', '1'), ('testC', '2')])]), - ])], + root_json_dict=[ + OrderedDict( + [("testA", [OrderedDict([("testB", "1"), ("testC", "2")])]),] + ) + ], schema_parser=schema_parser, - root_id='ocid', - rollup=True + root_id="ocid", + rollup=True, ) parser.parse() - assert list(parser.main_sheet) == [ 'testA/0/testB' ] - assert parser.main_sheet.lines == [ - {'testA/0/testB': '1'} - ] + assert list(parser.main_sheet) == ["testA/0/testB"] + assert parser.main_sheet.lines == [{"testA/0/testB": "1"}] assert len(parser.sub_sheets) == 1 - assert set(parser.sub_sheets['testA']) == set(['ocid', 'testA/0/testB', 'testA/0/testC']) - assert parser.sub_sheets['testA'].lines == [{'testA/0/testB':'1', 'testA/0/testC': '2'}] + assert set(parser.sub_sheets["testA"]) == set( + ["ocid", "testA/0/testB", "testA/0/testC"] + ) + assert parser.sub_sheets["testA"].lines == [ + {"testA/0/testB": "1", "testA/0/testC": "2"} + ] def test_rollup_multiple_values(self, recwarn): - schema_parser = SchemaParser(root_schema_dict={ - 'properties': { - 'testA': { - 'type': 'array', - 'rollUp': [ 'testB' ], - 'items': { - 'type': 'object', - 'properties': { - 'testB': {'type': 'string'}, - 'testC': {'type': 'string'} - } - } - }, - } - }, rollup=True) + schema_parser = SchemaParser( + root_schema_dict={ + "properties": { + "testA": { + "type": "array", + "rollUp": ["testB"], + "items": { + "type": "object", + "properties": { + "testB": {"type": "string"}, + "testC": {"type": "string"}, + }, + }, + }, + } + }, + rollup=True, + ) schema_parser.parse() parser = JSONParser( - root_json_dict=[OrderedDict([ - ('testA', [ - OrderedDict([('testB', '1'), ('testC', '2')]), - OrderedDict([('testB', '3'), ('testC', '4')]) - ]), - ])], + root_json_dict=[ + OrderedDict( + [ + ( + "testA", + [ + OrderedDict([("testB", "1"), ("testC", "2")]), + OrderedDict([("testB", "3"), ("testC", "4")]), + ], + ), + ] + ) + ], schema_parser=schema_parser, - rollup=True + rollup=True, ) parser.parse() - assert list(parser.main_sheet) == [ 'testA/0/testB' ] + assert list(parser.main_sheet) == ["testA/0/testB"] assert parser.main_sheet.lines == [ { - 'testA/0/testB': 'WARNING: More than one value supplied, consult the relevant sub-sheet for the data.' + "testA/0/testB": "WARNING: More than one value supplied, consult the relevant sub-sheet for the data." } ] assert len(parser.sub_sheets) == 1 - assert set(parser.sub_sheets['testA']) == set(['testA/0/testB', 'testA/0/testC']) - assert parser.sub_sheets['testA'].lines == [ - {'testA/0/testB':'1', 'testA/0/testC': '2'}, - {'testA/0/testB':'3', 'testA/0/testC': '4'} - ] + assert set(parser.sub_sheets["testA"]) == set( + ["testA/0/testB", "testA/0/testC"] + ) + assert parser.sub_sheets["testA"].lines == [ + {"testA/0/testB": "1", "testA/0/testC": "2"}, + {"testA/0/testB": "3", "testA/0/testC": "4"}, + ] w = recwarn.pop(UserWarning) - assert 'Could not provide rollup' in str(w.message) + assert "Could not provide rollup" in str(w.message) def test_two_parents(self): # This is a copy of test_two_parents from test_schema_parser.py, in # order to check that flattening and template generation use the same # sheet names - schema_parser = SchemaParser(root_schema_dict={ - 'properties': OrderedDict([ - ('Atest', { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest')} - }), - ('Dtest', { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Etest')} - }) - ]) - }) + schema_parser = SchemaParser( + root_schema_dict={ + "properties": OrderedDict( + [ + ( + "Atest", + { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, + }, + ), + ( + "Dtest", + { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Etest" + ), + }, + }, + ), + ] + ) + } + ) schema_parser.parse() parser = JSONParser( - root_json_dict=[{ - 'Atest': [{ - 'id': 1, - 'Btest': [{ - 'Ctest': 2 - }] - }], - 'Dtest': [{ - 'id': 3, - 'Btest': [{ - 'Etest': 4 - }] - }] - }], - schema_parser=schema_parser + root_json_dict=[ + { + "Atest": [{"id": 1, "Btest": [{"Ctest": 2}]}], + "Dtest": [{"id": 3, "Btest": [{"Etest": 4}]}], + } + ], + schema_parser=schema_parser, ) parser.parse() assert set(parser.main_sheet) == set() - assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest']) - assert list(parser.sub_sheets['Atest']) == ['Atest/0/id'] - assert list(parser.sub_sheets['Dtest']) == ['Dtest/0/id'] - assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest'] - assert list(parser.sub_sheets['Dte_Btest']) == ['Dtest/0/id', 'Dtest/0/Btest/0/Etest'] + assert set(parser.sub_sheets) == set( + ["Atest", "Dtest", "Ate_Btest", "Dte_Btest"] + ) + assert list(parser.sub_sheets["Atest"]) == ["Atest/0/id"] + assert list(parser.sub_sheets["Dtest"]) == ["Dtest/0/id"] + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "Atest/0/id", + "Atest/0/Btest/0/Ctest", + ] + assert list(parser.sub_sheets["Dte_Btest"]) == [ + "Dtest/0/id", + "Dtest/0/Btest/0/Etest", + ] + # TODO Check support for decimals, integers, booleans and Nones + class TestParseIDsCustomRootID(object): def test_parse_ids(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('custom', 1), - ('id', 2), - ('a', 'b'), - ('c', [OrderedDict([('id', 3), ('d', 'e')]), OrderedDict([('id', 3), ('d', 'e2')])]), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ])], root_id='custom') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("custom", 1), + ("id", 2), + ("a", "b"), + ( + "c", + [ + OrderedDict([("id", 3), ("d", "e")]), + OrderedDict([("id", 3), ("d", "e2")]), + ], + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + root_id="custom", + ) parser.parse() - assert list(parser.main_sheet) == [ 'custom', 'id', 'a', 'f/g' ] - assert parser.main_sheet.lines == [ - { - 'custom': 1, - 'id': 2, - 'a': 'b', - 'f/g': 'h' - } - ] - assert listify(parser.sub_sheets) == {'c': ['custom','id','c/0/id','c/0/d']} - assert parser.sub_sheets['c'].lines == [ - { - 'custom': 1, - 'id': 2, - 'c/0/id': 3, - 'c/0/d':'e' - }, - { - 'custom': 1, - 'id': 2, - 'c/0/id': 3, - 'c/0/d':'e2' - }, + assert list(parser.main_sheet) == ["custom", "id", "a", "f/g"] + assert parser.main_sheet.lines == [{"custom": 1, "id": 2, "a": "b", "f/g": "h"}] + assert listify(parser.sub_sheets) == {"c": ["custom", "id", "c/0/id", "c/0/d"]} + assert parser.sub_sheets["c"].lines == [ + {"custom": 1, "id": 2, "c/0/id": 3, "c/0/d": "e"}, + {"custom": 1, "id": 2, "c/0/id": 3, "c/0/d": "e2"}, ] def test_parse_ids_subsheet(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('custom', 1), - ('id', 2), - ('testnest', [ - OrderedDict([ - ('id', 3), - ('a', 'b'), - ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])]), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ]) - ]) - ])], root_id='custom') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("custom", 1), + ("id", 2), + ( + "testnest", + [ + OrderedDict( + [ + ("id", 3), + ("a", "b"), + ( + "c", + [ + OrderedDict([("d", "e")]), + OrderedDict([("d", "e2")]), + ], + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + ), + ] + ) + ], + root_id="custom", + ) parser.parse() - assert list(parser.main_sheet) == [ 'custom', 'id' ] - assert parser.main_sheet.lines == [ - { - 'custom': 1, - 'id': 2, - } - ] + assert list(parser.main_sheet) == ["custom", "id"] + assert parser.main_sheet.lines == [{"custom": 1, "id": 2,}] assert listify(parser.sub_sheets) == { - 'testnest': ['custom', 'id', 'testnest/0/id', 'testnest/0/a', 'testnest/0/f/g'], - 'tes_c': ['custom', 'id', 'testnest/0/id', 'testnest/0/c/0/d'] - } - assert parser.sub_sheets['testnest'].lines == [ + "testnest": [ + "custom", + "id", + "testnest/0/id", + "testnest/0/a", + "testnest/0/f/g", + ], + "tes_c": ["custom", "id", "testnest/0/id", "testnest/0/c/0/d"], + } + assert parser.sub_sheets["testnest"].lines == [ { - 'custom': 1, - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/a': 'b', - 'testnest/0/f/g': 'h', + "custom": 1, + "id": 2, + "testnest/0/id": 3, + "testnest/0/a": "b", + "testnest/0/f/g": "h", }, ] - assert parser.sub_sheets['tes_c'].lines == [ - { - 'custom': 1, - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/c/0/d':'e' - }, - { - 'custom': 1, - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/c/0/d':'e2' - }, + assert parser.sub_sheets["tes_c"].lines == [ + {"custom": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e"}, + {"custom": 1, "id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e2"}, ] def test_parse_ids_nested(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('custom', 1), - ('id', 2), - ('a', 'b'), - ('testnest', OrderedDict([ - ('id', 3), - ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])]) - ])), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ])], root_id='custom') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("custom", 1), + ("id", 2), + ("a", "b"), + ( + "testnest", + OrderedDict( + [ + ("id", 3), + ( + "c", + [ + OrderedDict([("d", "e")]), + OrderedDict([("d", "e2")]), + ], + ), + ] + ), + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + root_id="custom", + ) parser.parse() - assert list(parser.main_sheet) == [ 'custom', 'id', 'a', 'testnest/id', 'f/g' ] + assert list(parser.main_sheet) == ["custom", "id", "a", "testnest/id", "f/g"] assert parser.main_sheet.lines == [ - { - 'custom': 1, - 'id': 2, - 'a': 'b', - 'testnest/id': 3, - 'f/g': 'h' - } + {"custom": 1, "id": 2, "a": "b", "testnest/id": 3, "f/g": "h"} ] - assert listify(parser.sub_sheets) == {'tes_c': ['custom','id','testnest/id','testnest/c/0/d']} - assert parser.sub_sheets['tes_c'].lines == [ - { - 'custom': 1, - 'id': 2, - 'testnest/id': 3, - 'testnest/c/0/d':'e' - }, - { - 'custom': 1, - 'id': 2, - 'testnest/id': 3, - 'testnest/c/0/d':'e2' - }, + assert listify(parser.sub_sheets) == { + "tes_c": ["custom", "id", "testnest/id", "testnest/c/0/d"] + } + assert parser.sub_sheets["tes_c"].lines == [ + {"custom": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e"}, + {"custom": 1, "id": 2, "testnest/id": 3, "testnest/c/0/d": "e2"}, ] class TestParseIDsNoRootID(object): def test_parse_ids(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('id', 2), - ('a', 'b'), - ('c', [OrderedDict([('id', 3), ('d', 'e')]), OrderedDict([('id', 3), ('d', 'e2')])]), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ])], root_id='') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("id", 2), + ("a", "b"), + ( + "c", + [ + OrderedDict([("id", 3), ("d", "e")]), + OrderedDict([("id", 3), ("d", "e2")]), + ], + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + root_id="", + ) parser.parse() - assert list(parser.main_sheet) == [ 'id', 'a', 'f/g' ] - assert parser.main_sheet.lines == [ - { - 'id': 2, - 'a': 'b', - 'f/g': 'h' - } - ] - assert listify(parser.sub_sheets) == {'c': ['id','c/0/id','c/0/d']} - assert parser.sub_sheets['c'].lines == [ - { - 'id': 2, - 'c/0/id': 3, - 'c/0/d':'e' - }, - { - 'id': 2, - 'c/0/id': 3, - 'c/0/d':'e2' - }, + assert list(parser.main_sheet) == ["id", "a", "f/g"] + assert parser.main_sheet.lines == [{"id": 2, "a": "b", "f/g": "h"}] + assert listify(parser.sub_sheets) == {"c": ["id", "c/0/id", "c/0/d"]} + assert parser.sub_sheets["c"].lines == [ + {"id": 2, "c/0/id": 3, "c/0/d": "e"}, + {"id": 2, "c/0/id": 3, "c/0/d": "e2"}, ] def test_parse_ids_subsheet(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('id', 2), - ('testnest', [ - OrderedDict([ - ('id', 3), - ('a', 'b'), - ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])]), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ]) - ]) - ])], root_id='') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("id", 2), + ( + "testnest", + [ + OrderedDict( + [ + ("id", 3), + ("a", "b"), + ( + "c", + [ + OrderedDict([("d", "e")]), + OrderedDict([("d", "e2")]), + ], + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + ), + ] + ) + ], + root_id="", + ) parser.parse() - assert list(parser.main_sheet) == [ 'id' ] - assert parser.main_sheet.lines == [ - { - 'id': 2, - } - ] + assert list(parser.main_sheet) == ["id"] + assert parser.main_sheet.lines == [{"id": 2,}] assert listify(parser.sub_sheets) == { - 'testnest': ['id', 'testnest/0/id', 'testnest/0/a', 'testnest/0/f/g'], - 'tes_c': ['id', 'testnest/0/id', 'testnest/0/c/0/d'] - } - assert parser.sub_sheets['testnest'].lines == [ - { - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/a': 'b', - 'testnest/0/f/g': 'h', - }, + "testnest": ["id", "testnest/0/id", "testnest/0/a", "testnest/0/f/g"], + "tes_c": ["id", "testnest/0/id", "testnest/0/c/0/d"], + } + assert parser.sub_sheets["testnest"].lines == [ + {"id": 2, "testnest/0/id": 3, "testnest/0/a": "b", "testnest/0/f/g": "h",}, ] - assert parser.sub_sheets['tes_c'].lines == [ - { - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/c/0/d':'e' - }, - { - 'id': 2, - 'testnest/0/id': 3, - 'testnest/0/c/0/d':'e2' - }, + assert parser.sub_sheets["tes_c"].lines == [ + {"id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e"}, + {"id": 2, "testnest/0/id": 3, "testnest/0/c/0/d": "e2"}, ] def test_parse_ids_nested(self): - parser = JSONParser(root_json_dict=[OrderedDict([ - ('id', 2), - ('a', 'b'), - ('testnest', OrderedDict([ - ('id', 3), - ('c', [OrderedDict([('d', 'e')]), OrderedDict([('d', 'e2')])]) - ])), - ('f', {'g':'h'}) # Check that having nested objects doesn't break ID output - ])], root_id='') + parser = JSONParser( + root_json_dict=[ + OrderedDict( + [ + ("id", 2), + ("a", "b"), + ( + "testnest", + OrderedDict( + [ + ("id", 3), + ( + "c", + [ + OrderedDict([("d", "e")]), + OrderedDict([("d", "e2")]), + ], + ), + ] + ), + ), + ( + "f", + {"g": "h"}, + ), # Check that having nested objects doesn't break ID output + ] + ) + ], + root_id="", + ) parser.parse() - assert list(parser.main_sheet) == [ 'id', 'a', 'testnest/id', 'f/g' ] + assert list(parser.main_sheet) == ["id", "a", "testnest/id", "f/g"] assert parser.main_sheet.lines == [ - { - 'id': 2, - 'a': 'b', - 'testnest/id': 3, - 'f/g': 'h' - } + {"id": 2, "a": "b", "testnest/id": 3, "f/g": "h"} ] - assert listify(parser.sub_sheets) == {'tes_c': ['id','testnest/id','testnest/c/0/d']} - assert parser.sub_sheets['tes_c'].lines == [ - { - 'id': 2, - 'testnest/id': 3, - 'testnest/c/0/d':'e' - }, - { - 'id': 2, - 'testnest/id': 3, - 'testnest/c/0/d':'e2' - }, + assert listify(parser.sub_sheets) == { + "tes_c": ["id", "testnest/id", "testnest/c/0/d"] + } + assert parser.sub_sheets["tes_c"].lines == [ + {"id": 2, "testnest/id": 3, "testnest/c/0/d": "e"}, + {"id": 2, "testnest/id": 3, "testnest/c/0/d": "e2"}, ] diff --git a/flattentool/tests/test_json_input_is_unflatten_reversed.py b/flattentool/tests/test_json_input_is_unflatten_reversed.py index 81b45d09..257881b0 100644 --- a/flattentool/tests/test_json_input_is_unflatten_reversed.py +++ b/flattentool/tests/test_json_input_is_unflatten_reversed.py @@ -1,33 +1,66 @@ -''' +""" Test flattening (JSON input) by checking that we can run some of the unflatten testdata in reverse. -''' +""" -from .test_input_SpreadsheetInput_unflatten import ROOT_ID_PARAMS, testdata, testdata_titles, create_schema, inject_root_id -from .test_input_SpreadsheetInput_unflatten_mulitplesheets import testdata_multiplesheets, testdata_multiplesheets_titles -from flattentool.json_input import JSONParser -from flattentool.schema import SchemaParser +import json from collections import OrderedDict + import pytest -import json + +from flattentool.json_input import JSONParser +from flattentool.schema import SchemaParser + +from .test_input_SpreadsheetInput_unflatten import ( + ROOT_ID_PARAMS, + create_schema, + inject_root_id, + testdata, + testdata_titles, +) +from .test_input_SpreadsheetInput_unflatten_mulitplesheets import ( + testdata_multiplesheets, + testdata_multiplesheets_titles, +) + # Don't test with use_titles and use_schema true because this will will use # titles, which the fixtures don't -@pytest.mark.parametrize('use_titles,use_schema', [(False, False), (True, False), (False, True)]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -@pytest.mark.parametrize('comment,expected_output_list,input_list,warning_messages,reversible', [x for x in testdata if x[4]]) -def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, tmpdir, reversible): +@pytest.mark.parametrize( + "use_titles,use_schema", [(False, False), (True, False), (False, True)] +) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,expected_output_list,input_list,warning_messages,reversible", + [x for x in testdata if x[4]], +) +def test_flatten( + use_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + tmpdir, + reversible, +): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings - warnings.simplefilter('always') - extra_kwargs = {'use_titles': use_titles} + warnings.simplefilter("always") + + extra_kwargs = {"use_titles": use_titles} extra_kwargs.update(root_id_kwargs) - + if use_schema: schema_parser = SchemaParser( - root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, + root_schema_dict=create_schema(root_id) + if use_schema + else {"properties": {}}, rollup=True, **extra_kwargs ) @@ -35,20 +68,23 @@ def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list, ex else: schema_parser = None - with tmpdir.join('input.json').open('w') as fp: - json.dump({ - 'mykey': [inject_root_id(root_id, input_row) for input_row in input_list] - }, fp) + with tmpdir.join("input.json").open("w") as fp: + json.dump( + {"mykey": [inject_root_id(root_id, input_row) for input_row in input_list]}, + fp, + ) parser = JSONParser( - json_filename=tmpdir.join('input.json').strpath, - root_list_path='mykey', + json_filename=tmpdir.join("input.json").strpath, + root_list_path="mykey", schema_parser=schema_parser, - **extra_kwargs) + **extra_kwargs + ) parser.parse() expected_output_list = [ - inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list + inject_root_id(root_id, expected_output_dict) + for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary @@ -56,39 +92,83 @@ def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list, ex assert list(parser.main_sheet.lines) == expected_output_list -@pytest.mark.parametrize('comment,expected_output_list,input_list,warning_messages,reversible', [x for x in testdata_titles if x[4]]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -def test_flatten_titles(root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, reversible, tmpdir): +@pytest.mark.parametrize( + "comment,expected_output_list,input_list,warning_messages,reversible", + [x for x in testdata_titles if x[4]], +) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +def test_flatten_titles( + root_id, + root_id_kwargs, + input_list, + expected_output_list, + recwarn, + comment, + warning_messages, + reversible, + tmpdir, +): """ Essentially the same as test unflatten, except that convert_titles and use_schema are always true, as both of these are needed to convert titles properly. (and runs with different test data). """ - if root_id != '': + if root_id != "": # Skip all tests with a root ID for now, as this is broken # https://github.com/OpenDataServices/flatten-tool/issues/84 pytest.skip() - return test_flatten(use_titles=True, use_schema=True, root_id=root_id, root_id_kwargs=root_id_kwargs, input_list=input_list, expected_output_list=expected_output_list, recwarn=recwarn, comment=comment, warning_messages=warning_messages, reversible=reversible, tmpdir=tmpdir) - + return test_flatten( + use_titles=True, + use_schema=True, + root_id=root_id, + root_id_kwargs=root_id_kwargs, + input_list=input_list, + expected_output_list=expected_output_list, + recwarn=recwarn, + comment=comment, + warning_messages=warning_messages, + reversible=reversible, + tmpdir=tmpdir, + ) # Don't test with use_titles and use_schema true because this will will use # titles, which the fixtures don't -@pytest.mark.parametrize('use_titles,use_schema', [(False, False), (True, False), (False, True)]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -@pytest.mark.parametrize('comment,expected_output_dict,input_list,warning_messages,reversible', [x for x in testdata_multiplesheets if x[4]]) -def test_flatten_multiplesheets(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_dict, recwarn, comment, warning_messages, tmpdir, reversible): +@pytest.mark.parametrize( + "use_titles,use_schema", [(False, False), (True, False), (False, True)] +) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +@pytest.mark.parametrize( + "comment,expected_output_dict,input_list,warning_messages,reversible", + [x for x in testdata_multiplesheets if x[4]], +) +def test_flatten_multiplesheets( + use_titles, + use_schema, + root_id, + root_id_kwargs, + input_list, + expected_output_dict, + recwarn, + comment, + warning_messages, + tmpdir, + reversible, +): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings - warnings.simplefilter('always') - extra_kwargs = {'use_titles': use_titles} + warnings.simplefilter("always") + + extra_kwargs = {"use_titles": use_titles} extra_kwargs.update(root_id_kwargs) - + if use_schema: schema_parser = SchemaParser( - root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, + root_schema_dict=create_schema(root_id) + if use_schema + else {"properties": {}}, rollup=True, **extra_kwargs ) @@ -96,35 +176,70 @@ def test_flatten_multiplesheets(use_titles, use_schema, root_id, root_id_kwargs, else: schema_parser = None - with tmpdir.join('input.json').open('w') as fp: - json.dump({ - 'mykey': [inject_root_id(root_id, input_row) for input_row in input_list] - }, fp) + with tmpdir.join("input.json").open("w") as fp: + json.dump( + {"mykey": [inject_root_id(root_id, input_row) for input_row in input_list]}, + fp, + ) parser = JSONParser( - json_filename=tmpdir.join('input.json').strpath, - root_list_path='mykey', + json_filename=tmpdir.join("input.json").strpath, + root_list_path="mykey", schema_parser=schema_parser, - **extra_kwargs) + **extra_kwargs + ) parser.parse() - expected_output_dict = OrderedDict([(sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in expected_output_dict.items()]) - output = {sheet_name:sheet.lines for sheet_name, sheet in parser.sub_sheets.items() if sheet.lines} - output['custom_main'] = parser.main_sheet.lines + expected_output_dict = OrderedDict( + [ + (sheet_name, [inject_root_id(root_id, line) for line in lines]) + for sheet_name, lines in expected_output_dict.items() + ] + ) + output = { + sheet_name: sheet.lines + for sheet_name, sheet in parser.sub_sheets.items() + if sheet.lines + } + output["custom_main"] = parser.main_sheet.lines assert output == expected_output_dict -@pytest.mark.parametrize('comment,expected_output_dict,input_list,warning_messages,reversible', [x for x in testdata_multiplesheets_titles if x[4]]) -@pytest.mark.parametrize('root_id,root_id_kwargs', ROOT_ID_PARAMS) -def test_flatten_multiplesheets_titles(root_id, root_id_kwargs, input_list, expected_output_dict, recwarn, comment, warning_messages, reversible, tmpdir): +@pytest.mark.parametrize( + "comment,expected_output_dict,input_list,warning_messages,reversible", + [x for x in testdata_multiplesheets_titles if x[4]], +) +@pytest.mark.parametrize("root_id,root_id_kwargs", ROOT_ID_PARAMS) +def test_flatten_multiplesheets_titles( + root_id, + root_id_kwargs, + input_list, + expected_output_dict, + recwarn, + comment, + warning_messages, + reversible, + tmpdir, +): """ Essentially the same as test unflatten, except that convert_titles and use_schema are always true, as both of these are needed to convert titles properly. (and runs with different test data). """ - if root_id != '': + if root_id != "": # Skip all tests with a root ID for now, as this is broken # https://github.com/OpenDataServices/flatten-tool/issues/84 pytest.skip() - return test_flatten_multiplesheets(use_titles=True, use_schema=True, root_id=root_id, root_id_kwargs=root_id_kwargs, input_list=input_list, expected_output_dict=expected_output_dict, recwarn=recwarn, comment=comment, warning_messages=warning_messages, reversible=reversible, tmpdir=tmpdir) - + return test_flatten_multiplesheets( + use_titles=True, + use_schema=True, + root_id=root_id, + root_id_kwargs=root_id_kwargs, + input_list=input_list, + expected_output_dict=expected_output_dict, + recwarn=recwarn, + comment=comment, + warning_messages=warning_messages, + reversible=reversible, + tmpdir=tmpdir, + ) diff --git a/flattentool/tests/test_output.py b/flattentool/tests/test_output.py index 605dc50c..023ce09b 100644 --- a/flattentool/tests/test_output.py +++ b/flattentool/tests/test_output.py @@ -1,18 +1,20 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import pytest + import os -from flattentool import output, schema -from flattentool.sheet import Sheet -from flattentool.ODSReader import ODSReader + import openpyxl +import pytest +from flattentool import output +from flattentool.ODSReader import ODSReader +from flattentool.sheet import Sheet class MockParser(object): def __init__(self, main_sheet, sub_sheets): self.main_sheet = Sheet(main_sheet) - self.sub_sheets = {k:Sheet(v) for k,v in sub_sheets.items()} + self.sub_sheets = {k: Sheet(v) for k, v in sub_sheets.items()} def test_spreadsheetouput_base_fails(): @@ -28,183 +30,213 @@ def test_blank_sheets(tmpdir): for format_name, spreadsheet_output_class in output.FORMATS.items(): spreadsheet_output = spreadsheet_output_class( parser=MockParser([], {}), - main_sheet_name='release', - output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name])) + main_sheet_name="release", + output_name=os.path.join( + tmpdir.strpath, "release" + output.FORMATS_SUFFIX[format_name] + ), + ) spreadsheet_output.write_sheets() # Check XLSX is empty - wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath) - assert wb.sheetnames == ['release'] - rows = list(wb['release'].rows) + wb = openpyxl.load_workbook(tmpdir.join("release.xlsx").strpath) + assert wb.sheetnames == ["release"] + rows = list(wb["release"].rows) assert len(rows) == 0 - + # Check CSV is Empty - assert tmpdir.join('release').listdir() == [ tmpdir.join('release').join('release.csv') ] - assert tmpdir.join('release', 'release.csv').read().strip('\r\n') == '' + assert tmpdir.join("release").listdir() == [ + tmpdir.join("release").join("release.csv") + ] + assert tmpdir.join("release", "release.csv").read().strip("\r\n") == "" # Check ODS is empty - odswb = ODSReader(tmpdir.join('release.ods').strpath) - ods_rows = odswb.getSheet('release') + odswb = ODSReader(tmpdir.join("release.ods").strpath) + ods_rows = odswb.getSheet("release") assert ods_rows == [[]] def test_populated_header(tmpdir): for format_name, spreadsheet_output_class in output.FORMATS.items(): - subsheet = Sheet(root_id='ocid') - subsheet.add_field('c') + subsheet = Sheet(root_id="ocid") + subsheet.add_field("c") spreadsheet_output = spreadsheet_output_class( - parser=MockParser(['a', 'd'], {'b': subsheet}), - main_sheet_name='release', - output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name])) + parser=MockParser(["a", "d"], {"b": subsheet}), + main_sheet_name="release", + output_name=os.path.join( + tmpdir.strpath, "release" + output.FORMATS_SUFFIX[format_name] + ), + ) spreadsheet_output.write_sheets() # Check XLSX - wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath) - assert wb.sheetnames == ['release', 'b'] - rows = list(wb['release'].rows) + wb = openpyxl.load_workbook(tmpdir.join("release.xlsx").strpath) + assert wb.sheetnames == ["release", "b"] + rows = list(wb["release"].rows) assert len(rows) == 1 - assert [ x.value for x in rows[0] ] == [ 'a', 'd' ] - b_rows = list(wb['b'].rows) + assert [x.value for x in rows[0]] == ["a", "d"] + b_rows = list(wb["b"].rows) assert len(b_rows) == 1 - assert [ x.value for x in b_rows[0] ] == [ 'ocid', 'c' ] + assert [x.value for x in b_rows[0]] == ["ocid", "c"] # Check CSV - assert set(tmpdir.join('release').listdir()) == set([ - tmpdir.join('release').join('release.csv'), - tmpdir.join('release').join('b.csv') - ]) - assert tmpdir.join('release', 'release.csv').read().strip('\r\n') == 'a,d' - assert tmpdir.join('release', 'b.csv').read().strip('\r\n') == 'ocid,c' + assert set(tmpdir.join("release").listdir()) == set( + [ + tmpdir.join("release").join("release.csv"), + tmpdir.join("release").join("b.csv"), + ] + ) + assert tmpdir.join("release", "release.csv").read().strip("\r\n") == "a,d" + assert tmpdir.join("release", "b.csv").read().strip("\r\n") == "ocid,c" # Check ODS - odswb = ODSReader(tmpdir.join('release.ods').strpath) - ods_rows = odswb.getSheet('release') + odswb = ODSReader(tmpdir.join("release.ods").strpath) + ods_rows = odswb.getSheet("release") assert len(ods_rows) == 1 - assert [ x for x in ods_rows[0] ] == [ 'a', 'd' ] - ods_b_rows = odswb.getSheet('b') + assert [x for x in ods_rows[0]] == ["a", "d"] + ods_b_rows = odswb.getSheet("b") assert len(ods_b_rows) == 1 - assert [ x for x in ods_b_rows[0] ] == [ 'ocid', 'c' ] + assert [x for x in ods_b_rows[0]] == ["ocid", "c"] def test_empty_lines(tmpdir): - subsheet = Sheet(root_id='ocid') - subsheet.add_field('c') - parser = MockParser(['a', 'd'], {'b': subsheet}) + subsheet = Sheet(root_id="ocid") + subsheet.add_field("c") + parser = MockParser(["a", "d"], {"b": subsheet}) parser.main_sheet.lines = [] for format_name, spreadsheet_output_class in output.FORMATS.items(): spreadsheet_output = spreadsheet_output_class( parser=parser, - main_sheet_name='release', - output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name])) + main_sheet_name="release", + output_name=os.path.join( + tmpdir.strpath, "release" + output.FORMATS_SUFFIX[format_name] + ), + ) spreadsheet_output.write_sheets() # Check XLSX - wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath) - assert wb.sheetnames == ['release', 'b'] - rows = list(wb['release'].rows) + wb = openpyxl.load_workbook(tmpdir.join("release.xlsx").strpath) + assert wb.sheetnames == ["release", "b"] + rows = list(wb["release"].rows) assert len(rows) == 1 - assert [ x.value for x in rows[0] ] == [ 'a', 'd' ] - b_rows = list(wb['b'].rows) + assert [x.value for x in rows[0]] == ["a", "d"] + b_rows = list(wb["b"].rows) assert len(b_rows) == 1 - assert [ x.value for x in b_rows[0] ] == [ 'ocid', 'c' ] + assert [x.value for x in b_rows[0]] == ["ocid", "c"] # Check CSV - assert set(tmpdir.join('release').listdir()) == set([ - tmpdir.join('release').join('release.csv'), - tmpdir.join('release').join('b.csv') - ]) - assert tmpdir.join('release', 'release.csv').read().strip('\r\n') == 'a,d' - assert tmpdir.join('release', 'b.csv').read().strip('\r\n') == 'ocid,c' + assert set(tmpdir.join("release").listdir()) == set( + [ + tmpdir.join("release").join("release.csv"), + tmpdir.join("release").join("b.csv"), + ] + ) + assert tmpdir.join("release", "release.csv").read().strip("\r\n") == "a,d" + assert tmpdir.join("release", "b.csv").read().strip("\r\n") == "ocid,c" # Check ODS - odswb = ODSReader(tmpdir.join('release.ods').strpath) - ods_rows = odswb.getSheet('release') + odswb = ODSReader(tmpdir.join("release.ods").strpath) + ods_rows = odswb.getSheet("release") assert len(ods_rows) == 1 - assert [ x for x in ods_rows[0] ] == [ 'a', 'd' ] - ods_b_rows = odswb.getSheet('b') + assert [x for x in ods_rows[0]] == ["a", "d"] + ods_b_rows = odswb.getSheet("b") assert len(ods_b_rows) == 1 - assert [ x for x in ods_b_rows[0] ] == [ 'ocid', 'c' ] + assert [x for x in ods_b_rows[0]] == ["ocid", "c"] def test_populated_lines(tmpdir): - subsheet = Sheet(root_id='ocid') - subsheet.add_field('c') - parser = MockParser(['a'], {}) - parser.main_sheet.lines = [{'a': 'cell1'}, {'a': 'cell2'}] - subsheet.lines = [{'c': 'cell3'}, {'c': 'cell4'}] - parser.sub_sheets['b'] = subsheet + subsheet = Sheet(root_id="ocid") + subsheet.add_field("c") + parser = MockParser(["a"], {}) + parser.main_sheet.lines = [{"a": "cell1"}, {"a": "cell2"}] + subsheet.lines = [{"c": "cell3"}, {"c": "cell4"}] + parser.sub_sheets["b"] = subsheet for format_name, spreadsheet_output_class in output.FORMATS.items(): spreadsheet_output = spreadsheet_output_class( parser=parser, - main_sheet_name='release', - output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name])) + main_sheet_name="release", + output_name=os.path.join( + tmpdir.strpath, "release" + output.FORMATS_SUFFIX[format_name] + ), + ) spreadsheet_output.write_sheets() # Check XLSX - wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath) - assert wb.sheetnames == ['release', 'b'] - rows = list(wb['release'].rows) + wb = openpyxl.load_workbook(tmpdir.join("release.xlsx").strpath) + assert wb.sheetnames == ["release", "b"] + rows = list(wb["release"].rows) assert len(rows) == 3 - assert [ x.value for x in rows[0] ] == [ 'a' ] - assert [ x.value for x in rows[1] ] == [ 'cell1' ] - assert [ x.value for x in rows[2] ] == [ 'cell2' ] - b_rows = list(wb['b'].rows) + assert [x.value for x in rows[0]] == ["a"] + assert [x.value for x in rows[1]] == ["cell1"] + assert [x.value for x in rows[2]] == ["cell2"] + b_rows = list(wb["b"].rows) assert len(b_rows) == 3 - assert [ x.value for x in b_rows[0] ] == [ 'ocid', 'c' ] - assert [ x.value for x in b_rows[1] ] == [ None, 'cell3' ] - assert [ x.value for x in b_rows[2] ] == [ None, 'cell4' ] + assert [x.value for x in b_rows[0]] == ["ocid", "c"] + assert [x.value for x in b_rows[1]] == [None, "cell3"] + assert [x.value for x in b_rows[2]] == [None, "cell4"] # Check CSV - assert set(tmpdir.join('release').listdir()) == set([ - tmpdir.join('release').join('release.csv'), - tmpdir.join('release').join('b.csv') - ]) - assert tmpdir.join('release', 'release.csv').read().strip('\r\n').replace('\r', '') == 'a\ncell1\ncell2' - assert tmpdir.join('release', 'b.csv').read().strip('\r\n').replace('\r', '') == 'ocid,c\n,cell3\n,cell4' + assert set(tmpdir.join("release").listdir()) == set( + [ + tmpdir.join("release").join("release.csv"), + tmpdir.join("release").join("b.csv"), + ] + ) + assert ( + tmpdir.join("release", "release.csv").read().strip("\r\n").replace("\r", "") + == "a\ncell1\ncell2" + ) + assert ( + tmpdir.join("release", "b.csv").read().strip("\r\n").replace("\r", "") + == "ocid,c\n,cell3\n,cell4" + ) # Check ODS - currently broken test - odswb = ODSReader(tmpdir.join('release.ods').strpath) - ods_rows = odswb.getSheet('release') + odswb = ODSReader(tmpdir.join("release.ods").strpath) + ods_rows = odswb.getSheet("release") assert len(ods_rows) == 3 - assert [ x for x in ods_rows[0] ] == [ 'a' ] - assert [ x for x in ods_rows[1] ] == [ 'cell1' ] - assert [ x for x in ods_rows[2] ] == [ 'cell2' ] - ods_b_rows = odswb.getSheet('b') + assert [x for x in ods_rows[0]] == ["a"] + assert [x for x in ods_rows[1]] == ["cell1"] + assert [x for x in ods_rows[2]] == ["cell2"] + ods_b_rows = odswb.getSheet("b") assert len(ods_b_rows) == 3 - assert [ x for x in ods_b_rows[0] ] == [ 'ocid', 'c' ] - assert [ x for x in ods_b_rows[1] ] == [ None, 'cell3' ] - assert [ x for x in ods_b_rows[2] ] == [ None, 'cell4' ] + assert [x for x in ods_b_rows[0]] == ["ocid", "c"] + assert [x for x in ods_b_rows[1]] == [None, "cell3"] + assert [x for x in ods_b_rows[2]] == [None, "cell4"] + def test_utf8(tmpdir): - parser = MockParser(['é'], {}) - parser.main_sheet.lines = [{'é': 'éαГ😼𝒞人'}, {'é': 'cell2'}] + parser = MockParser(["é"], {}) + parser.main_sheet.lines = [{"é": "éαГ😼𝒞人"}, {"é": "cell2"}] for format_name, spreadsheet_output_class in output.FORMATS.items(): spreadsheet_output = spreadsheet_output_class( parser=parser, - main_sheet_name='release', - output_name=os.path.join(tmpdir.strpath, 'release'+output.FORMATS_SUFFIX[format_name])) + main_sheet_name="release", + output_name=os.path.join( + tmpdir.strpath, "release" + output.FORMATS_SUFFIX[format_name] + ), + ) spreadsheet_output.write_sheets() # Check XLSX - wb = openpyxl.load_workbook(tmpdir.join('release.xlsx').strpath) - assert wb.sheetnames == ['release'] - rows = list(wb['release'].rows) + wb = openpyxl.load_workbook(tmpdir.join("release.xlsx").strpath) + assert wb.sheetnames == ["release"] + rows = list(wb["release"].rows) assert len(rows) == 3 - assert [ x.value for x in rows[0] ] == [ 'é' ] - assert [ x.value for x in rows[1] ] == [ 'éαГ😼𝒞人' ] - assert [ x.value for x in rows[2] ] == [ 'cell2' ] + assert [x.value for x in rows[0]] == ["é"] + assert [x.value for x in rows[1]] == ["éαГ😼𝒞人"] + assert [x.value for x in rows[2]] == ["cell2"] # Check CSV - assert set(tmpdir.join('release').listdir()) == set([ - tmpdir.join('release').join('release.csv'), - ]) - release_csv_text = tmpdir.join('release', 'release.csv').read_text(encoding='utf-8') - assert release_csv_text.strip('\r\n').replace('\r', '') == 'é\néαГ😼𝒞人\ncell2' + assert set(tmpdir.join("release").listdir()) == set( + [tmpdir.join("release").join("release.csv"),] + ) + release_csv_text = tmpdir.join("release", "release.csv").read_text(encoding="utf-8") + assert release_csv_text.strip("\r\n").replace("\r", "") == "é\néαГ😼𝒞人\ncell2" # Check ODS - odswb = ODSReader(tmpdir.join('release.ods').strpath) - ods_rows = odswb.getSheet('release') + odswb = ODSReader(tmpdir.join("release.ods").strpath) + ods_rows = odswb.getSheet("release") assert len(ods_rows) == 3 - assert [ x for x in ods_rows[0] ] == [ 'é' ] - assert [ x for x in ods_rows[1] ] == [ 'éαГ😼𝒞人' ] - assert [ x for x in ods_rows[2] ] == [ 'cell2' ] + assert [x for x in ods_rows[0]] == ["é"] + assert [x for x in ods_rows[1]] == ["éαГ😼𝒞人"] + assert [x for x in ods_rows[2]] == ["cell2"] diff --git a/flattentool/tests/test_roundtrip.py b/flattentool/tests/test_roundtrip.py index 51ad44e8..07b09e93 100644 --- a/flattentool/tests/test_roundtrip.py +++ b/flattentool/tests/test_roundtrip.py @@ -1,120 +1,133 @@ -from flattentool import unflatten, flatten import json -import sys import os -import xmltodict + import pytest +import xmltodict + +from flattentool import flatten, unflatten -@pytest.mark.parametrize('output_format', ['xlsx', 'csv']) +@pytest.mark.parametrize("output_format", ["xlsx", "csv"]) def test_roundtrip(tmpdir, output_format): - input_name = 'flattentool/tests/fixtures/tenders_releases_2_releases.json' - base_name = 'flattentool/tests/fixtures/tenders_releases_base.json' + input_name = "flattentool/tests/fixtures/tenders_releases_2_releases.json" + base_name = "flattentool/tests/fixtures/tenders_releases_base.json" flatten( input_name=input_name, - output_name=tmpdir.join('flattened').strpath+'.'+output_format, + output_name=tmpdir.join("flattened").strpath + "." + output_format, output_format=output_format, - schema='flattentool/tests/fixtures/release-schema.json', - root_list_path='releases', - main_sheet_name='releases') + schema="flattentool/tests/fixtures/release-schema.json", + root_list_path="releases", + main_sheet_name="releases", + ) unflatten( - input_name=tmpdir.join('flattened').strpath+'.'+output_format, - output_name=tmpdir.join('roundtrip.json').strpath, + input_name=tmpdir.join("flattened").strpath + "." + output_format, + output_name=tmpdir.join("roundtrip.json").strpath, input_format=output_format, base_json=base_name, - schema='flattentool/tests/fixtures/release-schema.json', - root_list_path='releases') + schema="flattentool/tests/fixtures/release-schema.json", + root_list_path="releases", + ) original_json = json.load(open(input_name)) - roundtripped_json = json.load(tmpdir.join('roundtrip.json')) + roundtripped_json = json.load(tmpdir.join("roundtrip.json")) # Not currently possible to roundtrip Nones # https://github.com/open-contracting/flattening-ocds/issues/35 - for release in roundtripped_json['releases']: - release['tender']['awardCriteriaDetails'] = None + for release in roundtripped_json["releases"]: + release["tender"]["awardCriteriaDetails"] = None assert original_json == roundtripped_json -@pytest.mark.parametrize('use_titles', [False, True]) -@pytest.mark.parametrize('output_format', ['xlsx', 'csv']) +@pytest.mark.parametrize("use_titles", [False, True]) +@pytest.mark.parametrize("output_format", ["xlsx", "csv"]) def test_roundtrip_360(tmpdir, output_format, use_titles): - input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json' + input_name = ( + "flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json" + ) flatten( input_name=input_name, - output_name=tmpdir.join('flattened').strpath+'.'+output_format, + output_name=tmpdir.join("flattened").strpath + "." + output_format, output_format=output_format, - schema='flattentool/tests/fixtures/360-giving-schema.json', - root_list_path='grants', - root_id='', + schema="flattentool/tests/fixtures/360-giving-schema.json", + root_list_path="grants", + root_id="", use_titles=use_titles, - main_sheet_name='grants') + main_sheet_name="grants", + ) unflatten( - input_name=tmpdir.join('flattened').strpath+'.'+output_format, - output_name=tmpdir.join('roundtrip.json').strpath, + input_name=tmpdir.join("flattened").strpath + "." + output_format, + output_name=tmpdir.join("roundtrip.json").strpath, input_format=output_format, - schema='flattentool/tests/fixtures/360-giving-schema.json', - root_list_path='grants', - root_id='', - convert_titles=use_titles) + schema="flattentool/tests/fixtures/360-giving-schema.json", + root_list_path="grants", + root_id="", + convert_titles=use_titles, + ) original_json = json.load(open(input_name)) - roundtripped_json = json.load(tmpdir.join('roundtrip.json')) + roundtripped_json = json.load(tmpdir.join("roundtrip.json")) assert original_json == roundtripped_json -@pytest.mark.parametrize('use_titles', [False, True]) +@pytest.mark.parametrize("use_titles", [False, True]) def test_roundtrip_360_rollup(tmpdir, use_titles): - input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json' - output_format = 'csv' - output_name = tmpdir.join('flattened').strpath+'.'+output_format - moved_name = tmpdir.mkdir('flattened_main_only').strpath + input_name = ( + "flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json" + ) + output_format = "csv" + output_name = tmpdir.join("flattened").strpath + "." + output_format + moved_name = tmpdir.mkdir("flattened_main_only").strpath flatten( input_name=input_name, output_name=output_name, output_format=output_format, - schema='flattentool/tests/fixtures/360-giving-schema.json', - root_list_path='grants', - root_id='', + schema="flattentool/tests/fixtures/360-giving-schema.json", + root_list_path="grants", + root_id="", use_titles=use_titles, rollup=True, - main_sheet_name='grants') + main_sheet_name="grants", + ) - os.rename(output_name+'/grants.csv', moved_name+'/grants.csv') + os.rename(output_name + "/grants.csv", moved_name + "/grants.csv") unflatten( input_name=moved_name, - output_name=tmpdir.join('roundtrip.json').strpath, + output_name=tmpdir.join("roundtrip.json").strpath, input_format=output_format, - schema='flattentool/tests/fixtures/360-giving-schema.json', - root_list_path='grants', - root_id='', - convert_titles=use_titles) + schema="flattentool/tests/fixtures/360-giving-schema.json", + root_list_path="grants", + root_id="", + convert_titles=use_titles, + ) original_json = json.load(open(input_name)) - roundtripped_json = json.load(tmpdir.join('roundtrip.json')) + roundtripped_json = json.load(tmpdir.join("roundtrip.json")) assert original_json == roundtripped_json -@pytest.mark.parametrize('output_format', ['xlsx', 'csv']) +@pytest.mark.parametrize("output_format", ["xlsx", "csv"]) def test_roundtrip_xml(tmpdir, output_format): - input_name = 'examples/iati/expected.xml' + input_name = "examples/iati/expected.xml" flatten( input_name=input_name, - output_name=tmpdir.join('flattened').strpath+'.'+output_format, + output_name=tmpdir.join("flattened").strpath + "." + output_format, output_format=output_format, - root_list_path='iati-activity', - id_name='iati-identifier', - xml=True) + root_list_path="iati-activity", + id_name="iati-identifier", + xml=True, + ) unflatten( - input_name=tmpdir.join('flattened').strpath+'.'+output_format, - output_name=tmpdir.join('roundtrip.xml').strpath, + input_name=tmpdir.join("flattened").strpath + "." + output_format, + output_name=tmpdir.join("roundtrip.xml").strpath, input_format=output_format, - root_list_path='iati-activity', - id_name='iati-identifier', - xml=True) - original_xml = open(input_name, 'rb') - roundtripped_xml = tmpdir.join('roundtrip.xml').open('rb') + root_list_path="iati-activity", + id_name="iati-identifier", + xml=True, + ) + original_xml = open(input_name, "rb") + roundtripped_xml = tmpdir.join("roundtrip.xml").open("rb") # Compare without ordering, by using dict_constructor=dict instead of # OrderedDict diff --git a/flattentool/tests/test_schema_parser.py b/flattentool/tests/test_schema_parser.py index ef8479ac..f6420e4f 100644 --- a/flattentool/tests/test_schema_parser.py +++ b/flattentool/tests/test_schema_parser.py @@ -1,10 +1,15 @@ -import pytest from collections import OrderedDict -from flattentool.schema import SchemaParser, JsonLoaderLocalRefsDisabled, get_property_type_set -from flattentool.sheet import Sheet +import pytest + +from flattentool.schema import ( + JsonLoaderLocalRefsDisabled, + SchemaParser, + get_property_type_set, +) +from flattentool.sheet import Sheet -type_string = {'type': 'string'} +type_string = {"type": "string"} def test_sub_sheet_list_like(): @@ -12,294 +17,379 @@ def test_sub_sheet_list_like(): # .append() is used in json_input.py at https://github.com/OpenDataServices/flatten-tool/blob/master/flattentool/json_input.py#L33 sub_sheet = Sheet() assert list(sub_sheet) == [] - sub_sheet.append('a') - sub_sheet.append('b') - assert list(sub_sheet) == ['a', 'b'] + sub_sheet.append("a") + sub_sheet.append("b") + assert list(sub_sheet) == ["a", "b"] # ... but also has an add_field method, which also appends - sub_sheet.add_field('c') - assert list(sub_sheet) == ['a', 'b', 'c'] + sub_sheet.add_field("c") + assert list(sub_sheet) == ["a", "b", "c"] # but with the option to add an id_field, which appears at the start of the list - sub_sheet.add_field('d', id_field=True) - assert list(sub_sheet) == ['d', 'a', 'b', 'c'] + sub_sheet.add_field("d", id_field=True) + assert list(sub_sheet) == ["d", "a", "b", "c"] def test_get_property_type_set(): - assert get_property_type_set({'type': 'a'}) == set(['a']) - assert get_property_type_set({'type': ['a']}) == set(['a']) - assert get_property_type_set({'type': ['a', 'b']}) == set(['a', 'b']) + assert get_property_type_set({"type": "a"}) == set(["a"]) + assert get_property_type_set({"type": ["a"]}) == set(["a"]) + assert get_property_type_set({"type": ["a", "b"]}) == set(["a", "b"]) def test_filename_and_dict_error(tmpdir): """A value error should be raised if both schema_filename and root_schema_dict are supplied to SchemaParser""" - tmpfile = tmpdir.join('test_schema.json') - tmpfile.write('{}') + tmpfile = tmpdir.join("test_schema.json") + tmpfile.write("{}") with pytest.raises(ValueError): SchemaParser(schema_filename=tmpfile.strpath, root_schema_dict={}) # Supplying neither should also raise a ValueError with pytest.raises(ValueError): SchemaParser() + def test_references_followed(tmpdir): """JSON references should be followed when a JSON file is read.""" - tmpfile = tmpdir.join('test_schema.json') + tmpfile = tmpdir.join("test_schema.json") tmpfile.write('{"a":{"$ref":"#/b"}, "b":"c"}') parser = SchemaParser(schema_filename=tmpfile.strpath) - assert parser.root_schema_dict['a'] == 'c' + assert parser.root_schema_dict["a"] == "c" def test_order_preserved(tmpdir): """Order should be preserved when a JSON file is read.""" - tmpfile = tmpdir.join('test_schema.json') + tmpfile = tmpdir.join("test_schema.json") tmpfile.write('{"a":{}, "c":{}, "b":{}, "d":{}}') parser = SchemaParser(schema_filename=tmpfile.strpath) - assert list(parser.root_schema_dict.keys()) == ['a', 'c', 'b', 'd'] + assert list(parser.root_schema_dict.keys()) == ["a", "c", "b", "d"] def test_main_sheet_basic(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': type_string, - # type is allowed to be empty, and we should assume string - 'Btest': {}, + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": type_string, + # type is allowed to be empty, and we should assume string + "Btest": {}, + } } - }) + ) parser.parse() - assert set(parser.main_sheet) == set(['Atest', 'Btest']) + assert set(parser.main_sheet) == set(["Atest", "Btest"]) def test_main_sheet_nested(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'object', - 'properties': {'Ctest': type_string} + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": {"type": "object", "properties": {"Ctest": type_string}} } } - }) + ) parser.parse() - assert set(parser.main_sheet) == set(['Atest/Ctest']) + assert set(parser.main_sheet) == set(["Atest/Ctest"]) def test_sub_sheet(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': {'Btest': type_string} - } - }, + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "items": {"type": "object", "properties": {"Btest": type_string}}, + }, + } } - }) + ) parser.parse() assert set(parser.main_sheet) == set([]) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == ['Atest/0/Btest'] + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == ["Atest/0/Btest"] def object_in_array_example_properties(parent_name, child_name): return { - 'id': type_string, + "id": type_string, parent_name: { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': {child_name: type_string} - } - } + "type": "array", + "items": {"type": "object", "properties": {child_name: type_string}}, + }, } class TestSubSheetParentID(object): def test_parent_is_object(self): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest') + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + } } } - }) + ) parser.parse() - assert set(parser.main_sheet) == set(['Atest/id']) - assert set(parser.sub_sheets) == set(['Ate_Btest']) - assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/id', 'Atest/Btest/0/Ctest'] + assert set(parser.main_sheet) == set(["Atest/id"]) + assert set(parser.sub_sheets) == set(["Ate_Btest"]) + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "Atest/id", + "Atest/Btest/0/Ctest", + ] def test_parent_is_array(self): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': {'type': 'object', 'properties': object_in_array_example_properties('Btest', 'Ctest')} + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, + } } } - }) + ) parser.parse() assert set(parser.main_sheet) == set() - assert set(parser.sub_sheets) == set(['Atest', 'Ate_Btest']) - assert list(parser.sub_sheets['Atest']) == ['Atest/0/id'] - assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest'] + assert set(parser.sub_sheets) == set(["Atest", "Ate_Btest"]) + assert list(parser.sub_sheets["Atest"]) == ["Atest/0/id"] + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "Atest/0/id", + "Atest/0/Btest/0/Ctest", + ] def test_two_parents(self): - parser = SchemaParser(root_schema_dict={ - 'properties': OrderedDict([ - ('Atest', { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest')} - }), - ('Dtest', { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Etest')} - }) - ]) - }) + parser = SchemaParser( + root_schema_dict={ + "properties": OrderedDict( + [ + ( + "Atest", + { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, + }, + ), + ( + "Dtest", + { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Etest" + ), + }, + }, + ), + ] + ) + } + ) parser.parse() assert set(parser.main_sheet) == set() - assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest']) - assert list(parser.sub_sheets['Atest']) == ['Atest/0/id'] - assert list(parser.sub_sheets['Dtest']) == ['Dtest/0/id'] - assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest'] - assert list(parser.sub_sheets['Dte_Btest']) == ['Dtest/0/id', 'Dtest/0/Btest/0/Etest'] + assert set(parser.sub_sheets) == set( + ["Atest", "Dtest", "Ate_Btest", "Dte_Btest"] + ) + assert list(parser.sub_sheets["Atest"]) == ["Atest/0/id"] + assert list(parser.sub_sheets["Dtest"]) == ["Dtest/0/id"] + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "Atest/0/id", + "Atest/0/Btest/0/Ctest", + ] + assert list(parser.sub_sheets["Dte_Btest"]) == [ + "Dtest/0/id", + "Dtest/0/Btest/0/Etest", + ] def test_parent_is_object_nested(self): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest') - } + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "object", + "properties": { + "Btest": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + } + }, } } } - }) + ) parser.parse() - assert set(parser.main_sheet) == set(['Atest/Btest/id']) - assert set(parser.sub_sheets) == set(['Ate_Bte_Btest']) - assert list(parser.sub_sheets['Ate_Bte_Btest']) == ['Atest/Btest/id', 'Atest/Btest/Btest/0/Ctest'] + assert set(parser.main_sheet) == set(["Atest/Btest/id"]) + assert set(parser.sub_sheets) == set(["Ate_Bte_Btest"]) + assert list(parser.sub_sheets["Ate_Bte_Btest"]) == [ + "Atest/Btest/id", + "Atest/Btest/Btest/0/Ctest", + ] class TestSubSheetMainID(object): def test_parent_is_object(self): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'id': type_string, - 'Atest': { - 'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest') + parser = SchemaParser( + root_schema_dict={ + "properties": { + "id": type_string, + "Atest": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, } } - }) + ) parser.parse() - assert set(parser.main_sheet) == set(['id', 'Atest/id']) - assert set(parser.sub_sheets) == set(['Ate_Btest']) - assert list(parser.sub_sheets['Ate_Btest']) == ['id', 'Atest/id', 'Atest/Btest/0/Ctest'] + assert set(parser.main_sheet) == set(["id", "Atest/id"]) + assert set(parser.sub_sheets) == set(["Ate_Btest"]) + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "id", + "Atest/id", + "Atest/Btest/0/Ctest", + ] def test_parent_is_array(self): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'id': type_string, - 'Atest': { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest')} + parser = SchemaParser( + root_schema_dict={ + "properties": { + "id": type_string, + "Atest": { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, + }, } } - }) + ) parser.parse() - assert set(parser.main_sheet) == set(['id']) - assert set(parser.sub_sheets) == set(['Atest', 'Ate_Btest']) - assert list(parser.sub_sheets['Atest']) == ['id', 'Atest/0/id'] - assert list(parser.sub_sheets['Ate_Btest']) == ['id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest'] + assert set(parser.main_sheet) == set(["id"]) + assert set(parser.sub_sheets) == set(["Atest", "Ate_Btest"]) + assert list(parser.sub_sheets["Atest"]) == ["id", "Atest/0/id"] + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "id", + "Atest/0/id", + "Atest/0/Btest/0/Ctest", + ] def test_two_parents(self): - parser = SchemaParser(root_schema_dict={ - 'properties': OrderedDict([ - ('id', type_string), - ('Atest', { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest')} - }), - ('Dtest', { - 'type': 'array', - 'items': {'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Etest')} - }) - ]) - }) + parser = SchemaParser( + root_schema_dict={ + "properties": OrderedDict( + [ + ("id", type_string), + ( + "Atest", + { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, + }, + ), + ( + "Dtest", + { + "type": "array", + "items": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Etest" + ), + }, + }, + ), + ] + ) + } + ) parser.parse() - assert set(parser.main_sheet) == set(['id']) - assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest']) - assert list(parser.sub_sheets['Atest']) == ['id', 'Atest/0/id'] - assert list(parser.sub_sheets['Dtest']) == ['id', 'Dtest/0/id'] - assert list(parser.sub_sheets['Ate_Btest']) == ['id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest'] - assert list(parser.sub_sheets['Dte_Btest']) == ['id', 'Dtest/0/id', 'Dtest/0/Btest/0/Etest'] + assert set(parser.main_sheet) == set(["id"]) + assert set(parser.sub_sheets) == set( + ["Atest", "Dtest", "Ate_Btest", "Dte_Btest"] + ) + assert list(parser.sub_sheets["Atest"]) == ["id", "Atest/0/id"] + assert list(parser.sub_sheets["Dtest"]) == ["id", "Dtest/0/id"] + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "id", + "Atest/0/id", + "Atest/0/Btest/0/Ctest", + ] + assert list(parser.sub_sheets["Dte_Btest"]) == [ + "id", + "Dtest/0/id", + "Dtest/0/Btest/0/Etest", + ] def test_custom_main_sheet_name(self): parser = SchemaParser( root_schema_dict={ - 'properties': { - 'id': type_string, - 'Atest': { - 'type': 'object', - 'properties': object_in_array_example_properties('Btest', 'Ctest') - } + "properties": { + "id": type_string, + "Atest": { + "type": "object", + "properties": object_in_array_example_properties( + "Btest", "Ctest" + ), + }, } } ) parser.parse() - assert set(parser.main_sheet) == set(['id', 'Atest/id']) - assert set(parser.sub_sheets) == set(['Ate_Btest']) - assert list(parser.sub_sheets['Ate_Btest']) == [ - 'id', - 'Atest/id', - 'Atest/Btest/0/Ctest'] + assert set(parser.main_sheet) == set(["id", "Atest/id"]) + assert set(parser.sub_sheets) == set(["Ate_Btest"]) + assert list(parser.sub_sheets["Ate_Btest"]) == [ + "id", + "Atest/id", + "Atest/Btest/0/Ctest", + ] -@pytest.mark.parametrize('type_', ['string', 'number']) +@pytest.mark.parametrize("type_", ["string", "number"]) def test_simple_array(type_): parser = SchemaParser( root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': { - 'type': type_ - } - } - } + "properties": {"Atest": {"type": "array", "items": {"type": type_}}} } ) parser.parse() - assert set(parser.main_sheet) == set(['Atest']) + assert set(parser.main_sheet) == set(["Atest"]) -@pytest.mark.parametrize('type_', ['string', 'number']) +@pytest.mark.parametrize("type_", ["string", "number"]) def test_nested_simple_array(type_): parser = SchemaParser( root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': { - 'type': 'array', - 'items': { - 'type': type_ - } - } + "properties": { + "Atest": { + "type": "array", + "items": {"type": "array", "items": {"type": type_}}, } } } ) parser.parse() - assert set(parser.main_sheet) == set(['Atest']) + assert set(parser.main_sheet) == set(["Atest"]) def test_references_sheet_names(tmpdir): @@ -308,398 +398,384 @@ def test_references_sheet_names(tmpdir): but is NOT any more. """ - tmpfile = tmpdir.join('test_schema.json') - tmpfile.write('''{ + tmpfile = tmpdir.join("test_schema.json") + tmpfile.write( + """{ "properties": { "Atest": { "type": "array", "items": {"$ref": "#/Btest"} } }, "Btest": { "type": "object", "properties": {"Ctest":{"type": "string"}} } - }''') + }""" + ) parser = SchemaParser(schema_filename=tmpfile.strpath) parser.parse() - assert set(parser.sub_sheets) == set(['Atest']) # used to be Btest - assert list(parser.sub_sheets['Atest']) == ['Atest/0/Ctest'] + assert set(parser.sub_sheets) == set(["Atest"]) # used to be Btest + assert list(parser.sub_sheets["Atest"]) == ["Atest/0/Ctest"] def test_rollup(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'rollUp': [ 'Btest' ], - 'items': { - 'type': 'object', - 'properties': { - 'Btest': type_string, - 'Ctest': type_string - } - } - }, - } - }, rollup=True) + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "rollUp": ["Btest"], + "items": { + "type": "object", + "properties": {"Btest": type_string, "Ctest": type_string}, + }, + }, + } + }, + rollup=True, + ) parser.parse() - assert set(parser.main_sheet) == set(['Atest/0/Btest']) - assert set(parser.sub_sheets) == set(['Atest']) - assert set(parser.sub_sheets['Atest']) == set(['Atest/0/Btest', 'Atest/0/Ctest']) + assert set(parser.main_sheet) == set(["Atest/0/Btest"]) + assert set(parser.sub_sheets) == set(["Atest"]) + assert set(parser.sub_sheets["Atest"]) == set(["Atest/0/Btest", "Atest/0/Ctest"]) def test_bad_rollup(recwarn): - ''' + """ When rollUp is specified, but the field is missing in the schema, we expect a warning. - ''' - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'rollUp': [ 'Btest' ], - 'items': { - 'type': 'object', - 'properties': { - 'Ctest': type_string - } - } - }, - } - }, rollup=True) + """ + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "rollUp": ["Btest"], + "items": {"type": "object", "properties": {"Ctest": type_string}}, + }, + } + }, + rollup=True, + ) parser.parse() w = recwarn.pop(UserWarning) - assert 'Btest in rollUp but not in schema' in str(w.message) + assert "Btest in rollUp but not in schema" in str(w.message) assert set(parser.main_sheet) == set() - assert set(parser.sub_sheets) == set(['Atest']) - assert set(parser.sub_sheets['Atest']) == set(['Atest/0/Ctest']) + assert set(parser.sub_sheets) == set(["Atest"]) + assert set(parser.sub_sheets["Atest"]) == set(["Atest/0/Ctest"]) def test_sub_sheet_custom_id(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': {'Btest': type_string} - } - }, - } - }, root_id='custom') + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "items": {"type": "object", "properties": {"Btest": type_string}}, + }, + } + }, + root_id="custom", + ) parser.parse() assert set(parser.main_sheet) == set([]) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == ['custom', 'Atest/0/Btest'] + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == ["custom", "Atest/0/Btest"] + def test_sub_sheet_empty_string_root_id(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': {'Btest': type_string} - } - }, - } - }, root_id='') + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "items": {"type": "object", "properties": {"Btest": type_string}}, + }, + } + }, + root_id="", + ) parser.parse() assert set(parser.main_sheet) == set([]) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == ['Atest/0/Btest'] + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == ["Atest/0/Btest"] -@pytest.mark.parametrize('use_titles', [True, False]) +@pytest.mark.parametrize("use_titles", [True, False]) def test_use_titles(recwarn, use_titles): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'title': 'ATitle', - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'string', - 'title': 'BTitle' - } - } - } - }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "title": "ATitle", + "type": "array", + "items": { + "type": "object", + "properties": {"Btest": {"type": "string", "title": "BTitle"}}, + }, + }, + "Ctest": {"type": "string", "title": "CTitle"}, } - } - }, use_titles=use_titles) + }, + use_titles=use_titles, + ) parser.parse() assert len(recwarn) == 0 if use_titles: - assert set(parser.main_sheet) == set(['CTitle']) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == ['ATitle:BTitle'] + assert set(parser.main_sheet) == set(["CTitle"]) + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == ["ATitle:BTitle"] # Array title missing - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'string', - 'title': 'BTitle' - } - } - } - }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "items": { + "type": "object", + "properties": {"Btest": {"type": "string", "title": "BTitle"}}, + }, + }, + "Ctest": {"type": "string", "title": "CTitle"}, } - } - }, use_titles=use_titles) + }, + use_titles=use_titles, + ) parser.parse() if use_titles: - assert set(parser.main_sheet) == set(['CTitle']) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == [] + assert set(parser.main_sheet) == set(["CTitle"]) + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == [] assert len(recwarn) == 1 w = recwarn.pop(UserWarning) - assert 'Field Atest does not have a title' in str(w.message) + assert "Field Atest does not have a title" in str(w.message) else: assert len(recwarn) == 0 - # Object containing array title missing - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Xtest': { - 'type': 'object', - 'properties': { - 'Atest': { - 'type': 'array', - 'title': 'ATitle', - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'string', - 'title': 'BTitle' - } - } + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Xtest": { + "type": "object", + "properties": { + "Atest": { + "type": "array", + "title": "ATitle", + "items": { + "type": "object", + "properties": { + "Btest": {"type": "string", "title": "BTitle"} + }, + }, } - } - } - }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' + }, + }, + "Ctest": {"type": "string", "title": "CTitle"}, } - } - }, use_titles=use_titles) + }, + use_titles=use_titles, + ) parser.parse() if use_titles: - assert set(parser.main_sheet) == set(['CTitle']) - assert set(parser.sub_sheets) == set(['Xte_Atest']) - assert list(parser.sub_sheets['Xte_Atest']) == [] + assert set(parser.main_sheet) == set(["CTitle"]) + assert set(parser.sub_sheets) == set(["Xte_Atest"]) + assert list(parser.sub_sheets["Xte_Atest"]) == [] assert len(recwarn) == 1 w = recwarn.pop(UserWarning) - assert 'Field Xtest/Atest/0/Btest is missing a title' in str(w.message) + assert "Field Xtest/Atest/0/Btest is missing a title" in str(w.message) else: assert len(recwarn) == 0 -@pytest.mark.parametrize('use_titles', [True, False]) + +@pytest.mark.parametrize("use_titles", [True, False]) def test_use_titles3(recwarn, use_titles): # Array containing a nested object title missing - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'title': 'ATitle', - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'object', - 'properties': { - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' - } + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "title": "ATitle", + "items": { + "type": "object", + "properties": { + "Btest": { + "type": "object", + "properties": { + "Ctest": {"type": "string", "title": "CTitle"} + }, } - } - } - } - }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' + }, + }, + }, + "Ctest": {"type": "string", "title": "CTitle"}, } - } - }, use_titles=use_titles) + }, + use_titles=use_titles, + ) parser.parse() if use_titles: - assert set(parser.main_sheet) == set(['CTitle']) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == [] + assert set(parser.main_sheet) == set(["CTitle"]) + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == [] assert len(recwarn) == 1 w = recwarn.pop(UserWarning) - assert 'Field Atest/0/Btest/Ctest is missing a title' in str(w.message) + assert "Field Atest/0/Btest/Ctest is missing a title" in str(w.message) else: assert len(recwarn) == 0 -@pytest.mark.parametrize('use_titles', [True, False]) + +@pytest.mark.parametrize("use_titles", [True, False]) def test_use_titles2(recwarn, use_titles): # Object containing object title missing - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Xtest': { - 'type': 'object', - 'properties': { - 'Atest': { - 'type': 'object', - 'title': 'ATitle', - 'properties': { - 'Btest': { - 'type': 'string', - 'title': 'BTitle' - } + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Xtest": { + "type": "object", + "properties": { + "Atest": { + "type": "object", + "title": "ATitle", + "properties": { + "Btest": {"type": "string", "title": "BTitle"} + }, } - } - } - }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' + }, + }, + "Ctest": {"type": "string", "title": "CTitle"}, } - } - }, use_titles=use_titles) + }, + use_titles=use_titles, + ) parser.parse() if use_titles: - assert set(parser.main_sheet) == set(['CTitle']) + assert set(parser.main_sheet) == set(["CTitle"]) assert set(parser.sub_sheets) == set([]) assert len(recwarn) == 1 w = recwarn.pop(UserWarning) - assert 'Field Xtest/Atest/Btest does not have a title, skipping' in str(w.message) + assert "Field Xtest/Atest/Btest does not have a title, skipping" in str( + w.message + ) else: assert len(recwarn) == 0 # Main sheet title missing - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'title': 'ATitle', - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'string', - 'title': 'BTitle' - } - } - } - }, - 'Ctest': { - 'type': 'string' + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "title": "ATitle", + "type": "array", + "items": { + "type": "object", + "properties": {"Btest": {"type": "string", "title": "BTitle"}}, + }, + }, + "Ctest": {"type": "string"}, } - } - }, use_titles=use_titles) + }, + use_titles=use_titles, + ) parser.parse() if use_titles: assert set(parser.main_sheet) == set([]) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == ['ATitle:BTitle'] + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == ["ATitle:BTitle"] assert len(recwarn) == 1 w = recwarn.pop(UserWarning) - assert 'Field Ctest does not have a title' in str(w.message) + assert "Field Ctest does not have a title" in str(w.message) else: assert len(recwarn) == 0 def test_use_titles5(recwarn): # Child sheet title missing - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'title': 'ATitle', - 'type': 'array', - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'string' - } - } - } - }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle' + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "title": "ATitle", + "type": "array", + "items": { + "type": "object", + "properties": {"Btest": {"type": "string"}}, + }, + }, + "Ctest": {"type": "string", "title": "CTitle"}, } - } - }, use_titles=True) + }, + use_titles=True, + ) parser.parse() - assert set(parser.main_sheet) == set(['CTitle']) - assert set(parser.sub_sheets) == set(['Atest']) - assert list(parser.sub_sheets['Atest']) == [] + assert set(parser.main_sheet) == set(["CTitle"]) + assert set(parser.sub_sheets) == set(["Atest"]) + assert list(parser.sub_sheets["Atest"]) == [] w = recwarn.pop(UserWarning) - assert 'Field Atest/0/Btest is missing a title' in str(w.message) + assert "Field Atest/0/Btest is missing a title" in str(w.message) def test_titles_rollup(): - parser = SchemaParser(root_schema_dict={ - 'properties': { - 'Atest': { - 'type': 'array', - 'title': 'ATitle', - 'rollUp': [ 'Btest' ], - 'items': { - 'type': 'object', - 'properties': { - 'Btest': { - 'type': 'string', - 'title': 'BTitle', + parser = SchemaParser( + root_schema_dict={ + "properties": { + "Atest": { + "type": "array", + "title": "ATitle", + "rollUp": ["Btest"], + "items": { + "type": "object", + "properties": { + "Btest": {"type": "string", "title": "BTitle",}, + "Ctest": {"type": "string", "title": "CTitle",}, }, - 'Ctest': { - 'type': 'string', - 'title': 'CTitle', - } - } - } - }, - } - }, rollup=True, use_titles=True) + }, + }, + } + }, + rollup=True, + use_titles=True, + ) parser.parse() - assert set(parser.main_sheet) == set(['ATitle:BTitle']) - assert set(parser.sub_sheets) == set(['Atest']) - assert set(parser.sub_sheets['Atest']) == set(['ATitle:BTitle', 'ATitle:CTitle']) + assert set(parser.main_sheet) == set(["ATitle:BTitle"]) + assert set(parser.sub_sheets) == set(["Atest"]) + assert set(parser.sub_sheets["Atest"]) == set(["ATitle:BTitle", "ATitle:CTitle"]) def test_schema_from_uri(httpserver): httpserver.serve_content('{"a":{"$ref":"#/b"}, "b":"c"}', 404) parser = SchemaParser(schema_filename=httpserver.url) - assert parser.root_schema_dict['a'] == 'c' + assert parser.root_schema_dict["a"] == "c" test_json_loader_local_refs_disabled_is_ref_local_data_returns_true = [ - ( "file:///home/odsc/work/flatten-tool/examples/create-template/refs/definitions.json#/definition/address" ), - ( "definitions.json#/definition/address" ), + ( + "file:///home/odsc/work/flatten-tool/examples/create-template/refs/definitions.json#/definition/address" + ), + ("definitions.json#/definition/address"), ] -@pytest.mark.parametrize("data", test_json_loader_local_refs_disabled_is_ref_local_data_returns_true) +@pytest.mark.parametrize( + "data", test_json_loader_local_refs_disabled_is_ref_local_data_returns_true +) def test_json_loader_local_refs_disabled_is_ref_local_true(data): assert True == JsonLoaderLocalRefsDisabled().is_ref_local(data) + test_json_loader_local_refs_disabled_is_ref_local_data_returns_false = [ - ( "https://raw.githubusercontent.com/openownership/data-standard/master/schema/beneficial-ownership-statements.json" ), - ( "http://raw.githubusercontent.com/openownership/data-standard/master/schema/beneficial-ownership-statements.json" ), + ( + "https://raw.githubusercontent.com/openownership/data-standard/master/schema/beneficial-ownership-statements.json" + ), + ( + "http://raw.githubusercontent.com/openownership/data-standard/master/schema/beneficial-ownership-statements.json" + ), ] -@pytest.mark.parametrize("data", test_json_loader_local_refs_disabled_is_ref_local_data_returns_false) -def test_json_loader_local_refs_disabled_is_ref_local_true(data): +@pytest.mark.parametrize( # noqa + "data", test_json_loader_local_refs_disabled_is_ref_local_data_returns_false +) +def test_json_loader_local_refs_disabled_is_ref_local_true(data): # noqa assert False == JsonLoaderLocalRefsDisabled().is_ref_local(data) diff --git a/flattentool/tests/test_unflatten.py b/flattentool/tests/test_unflatten.py index e9b57604..5e7b9c02 100644 --- a/flattentool/tests/test_unflatten.py +++ b/flattentool/tests/test_unflatten.py @@ -1,110 +1,128 @@ -import os import json +import os + import pytest from flattentool import unflatten + def test_360_main_sheetname_insensitive(tmpdir): - input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx' + input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx" unflatten( input_name=input_name, - output_name=tmpdir.join('output_grant.json').strpath, - input_format='xlsx', - schema='flattentool/tests/fixtures/360-giving-schema.json', - main_sheet_name='grants', - root_list_path='grants', - root_id='', - convert_titles=True) - output_json_grants = json.load(tmpdir.join('output_grant.json')) - - input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_sheet_title_case.xlsx' + output_name=tmpdir.join("output_grant.json").strpath, + input_format="xlsx", + schema="flattentool/tests/fixtures/360-giving-schema.json", + main_sheet_name="grants", + root_list_path="grants", + root_id="", + convert_titles=True, + ) + output_json_grants = json.load(tmpdir.join("output_grant.json")) + + input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_sheet_title_case.xlsx" unflatten( input_name=input_name, - output_name=tmpdir.join('output_grant_sheet_title_case.json').strpath, - input_format='xlsx', - schema='flattentool/tests/fixtures/360-giving-schema.json', - main_sheet_name='grants', - root_list_path='grants', - root_id='', - convert_titles=True) - output_json_Grants = json.load(tmpdir.join('output_grant_sheet_title_case.json')) + output_name=tmpdir.join("output_grant_sheet_title_case.json").strpath, + input_format="xlsx", + schema="flattentool/tests/fixtures/360-giving-schema.json", + main_sheet_name="grants", + root_list_path="grants", + root_id="", + convert_titles=True, + ) + output_json_Grants = json.load(tmpdir.join("output_grant_sheet_title_case.json")) assert output_json_grants == output_json_Grants + def test_360_fields_case_insensitive(tmpdir): - input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx' + input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx" unflatten( input_name=input_name, - output_name=tmpdir.join('output_grant.json').strpath, - input_format='xlsx', - schema='flattentool/tests/fixtures/360-giving-schema.json', - main_sheet_name='grants', - root_list_path='grants', - root_id='', - convert_titles=True) - output_json_grants = json.load(tmpdir.join('output_grant.json')) - - input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_title_space_case.xlsx' + output_name=tmpdir.join("output_grant.json").strpath, + input_format="xlsx", + schema="flattentool/tests/fixtures/360-giving-schema.json", + main_sheet_name="grants", + root_list_path="grants", + root_id="", + convert_titles=True, + ) + output_json_grants = json.load(tmpdir.join("output_grant.json")) + + input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_title_space_case.xlsx" unflatten( input_name=input_name, - output_name=tmpdir.join('output_space_case.json').strpath, - input_format='xlsx', - schema='flattentool/tests/fixtures/360-giving-schema.json', - main_sheet_name='grants', - root_list_path='grants', - root_id='', - convert_titles=True) - output_json_space_case = json.load(tmpdir.join('output_space_case.json')) + output_name=tmpdir.join("output_space_case.json").strpath, + input_format="xlsx", + schema="flattentool/tests/fixtures/360-giving-schema.json", + main_sheet_name="grants", + root_list_path="grants", + root_id="", + convert_titles=True, + ) + output_json_space_case = json.load(tmpdir.join("output_space_case.json")) assert output_json_grants == output_json_space_case -@pytest.mark.parametrize('dirname', ['examples/iati', 'examples/iati_multilang']) +@pytest.mark.parametrize("dirname", ["examples/iati", "examples/iati_multilang"]) def test_unflatten_xml(tmpdir, dirname): - schema_path = 'examples/iati' - schemas = ['iati-activities-schema.xsd', 'iati-common.xsd'] - schema_filepaths = ['{}/{}'.format(schema_path, schema) for schema in schemas] + schema_path = "examples/iati" + schemas = ["iati-activities-schema.xsd", "iati-common.xsd"] + schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas] unflatten( input_name=dirname, - output_name=tmpdir.join('output.xml').strpath, - input_format='csv', - root_list_path='iati-activity', - id_name='iati-identifier', + output_name=tmpdir.join("output.xml").strpath, + input_format="csv", + root_list_path="iati-activity", + id_name="iati-identifier", xml=True, xml_schemas=schema_filepaths, - ) - assert open(os.path.join(dirname, 'expected.xml')).read() == tmpdir.join('output.xml').read() + ) + assert ( + open(os.path.join(dirname, "expected.xml")).read() + == tmpdir.join("output.xml").read() + ) -@pytest.mark.parametrize('dirname', ['examples/iati_xml_comment']) +@pytest.mark.parametrize("dirname", ["examples/iati_xml_comment"]) def test_unflatten_xml_comment(tmpdir, dirname): """ Edit default xml comment 'XML generated by flatten-tool' by 'XML generated by ODS' """ - schema_path = 'examples/iati' - schemas = ['iati-activities-schema.xsd', 'iati-common.xsd'] - schema_filepaths = ['{}/{}'.format(schema_path, schema) for schema in schemas] + schema_path = "examples/iati" + schemas = ["iati-activities-schema.xsd", "iati-common.xsd"] + schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas] unflatten( input_name=dirname, - output_name=tmpdir.join('output.xml').strpath, - input_format='csv', - root_list_path='iati-activity', - id_name='iati-identifier', + output_name=tmpdir.join("output.xml").strpath, + input_format="csv", + root_list_path="iati-activity", + id_name="iati-identifier", xml=True, xml_schemas=schema_filepaths, - xml_comment='XML generated by ODS' - ) - assert open(os.path.join(dirname, 'expected.xml')).read() == tmpdir.join('output.xml').read() + xml_comment="XML generated by ODS", + ) + assert ( + open(os.path.join(dirname, "expected.xml")).read() + == tmpdir.join("output.xml").read() + ) -@pytest.mark.parametrize('input_format', ['xlsx', 'ods']) +@pytest.mark.parametrize("input_format", ["xlsx", "ods"]) def test_unflatten_org_xml_xlsx(tmpdir, input_format): unflatten( - input_name='flattentool/tests/fixtures/{}/iati-org.{}'.format(input_format, input_format), - output_name=tmpdir.join('output.xml').strpath, + input_name="flattentool/tests/fixtures/{}/iati-org.{}".format( + input_format, input_format + ), + output_name=tmpdir.join("output.xml").strpath, input_format=input_format, - id_name='organisation-identifier', + id_name="organisation-identifier", xml=True, - metatab_name='Meta' - ) - assert open('flattentool/tests/fixtures/iati-org.xml').read() == tmpdir.join('output.xml').read() + metatab_name="Meta", + ) + assert ( + open("flattentool/tests/fixtures/iati-org.xml").read() + == tmpdir.join("output.xml").read() + ) diff --git a/flattentool/tests/test_xml_input.py b/flattentool/tests/test_xml_input.py index b26a0711..4ab90784 100644 --- a/flattentool/tests/test_xml_input.py +++ b/flattentool/tests/test_xml_input.py @@ -1,14 +1,20 @@ -from flattentool.json_input import JSONParser -from flattentool.json_input import lists_of_dicts_paths, dicts_to_list_of_dicts, list_dict_consistency +from flattentool.json_input import ( + JSONParser, + dicts_to_list_of_dicts, + list_dict_consistency, + lists_of_dicts_paths, +) + def test_xml_empty(): parser = JSONParser( - json_filename='flattentool/tests/fixtures/empty.xml', - root_list_path='iati-activity', + json_filename="flattentool/tests/fixtures/empty.xml", + root_list_path="iati-activity", schema_parser=None, - root_id='', + root_id="", xml=True, - id_name='iati-identifier') + id_name="iati-identifier", + ) parser.parse() assert list(parser.main_sheet) == [] assert parser.main_sheet.lines == [] @@ -17,125 +23,242 @@ def test_xml_empty(): def test_xml_basic_example(): parser = JSONParser( - json_filename='examples/iati/expected.xml', - root_list_path='iati-activity', + json_filename="examples/iati/expected.xml", + root_list_path="iati-activity", schema_parser=None, - root_id='', + root_id="", xml=True, - id_name='iati-identifier') + id_name="iati-identifier", + ) parser.parse() - assert list(parser.main_sheet) == ['iati-identifier', 'reporting-org/@ref', 'reporting-org/@type', 'reporting-org/narrative', 'title/narrative', 'description/narrative', 'participating-org/@ref', 'participating-org/@role', 'activity-status/@code', 'activity-date/@iso-date', 'activity-date/@type'] + assert list(parser.main_sheet) == [ + "iati-identifier", + "reporting-org/@ref", + "reporting-org/@type", + "reporting-org/narrative", + "title/narrative", + "description/narrative", + "participating-org/@ref", + "participating-org/@role", + "activity-status/@code", + "activity-date/@iso-date", + "activity-date/@type", + ] assert parser.main_sheet.lines == [ - {'activity-date/@type': '1', 'reporting-org/narrative': 'Organisation name', 'participating-org/@ref': 'AA-AAA-123456789', 'title/narrative': 'A title', 'participating-org/@role': '1', 'reporting-org/@ref': 'AA-AAA-123456789', 'iati-identifier': 'AA-AAA-123456789-ABC123', 'reporting-org/@type': '40', 'description/narrative': 'A description', 'activity-date/@iso-date': '2011-10-01', 'activity-status/@code': '2'}, - {'activity-date/@type': '2', 'reporting-org/narrative': 'Organisation name', 'participating-org/@ref': 'AA-AAA-123456789', 'title/narrative': 'Another title', 'participating-org/@role': '1', 'reporting-org/@ref': 'AA-AAA-123456789', 'iati-identifier': 'AA-AAA-123456789-ABC124', 'reporting-org/@type': '40', 'description/narrative': 'Another description', 'activity-date/@iso-date': '2016-01-01', 'activity-status/@code': '3'} + { + "activity-date/@type": "1", + "reporting-org/narrative": "Organisation name", + "participating-org/@ref": "AA-AAA-123456789", + "title/narrative": "A title", + "participating-org/@role": "1", + "reporting-org/@ref": "AA-AAA-123456789", + "iati-identifier": "AA-AAA-123456789-ABC123", + "reporting-org/@type": "40", + "description/narrative": "A description", + "activity-date/@iso-date": "2011-10-01", + "activity-status/@code": "2", + }, + { + "activity-date/@type": "2", + "reporting-org/narrative": "Organisation name", + "participating-org/@ref": "AA-AAA-123456789", + "title/narrative": "Another title", + "participating-org/@role": "1", + "reporting-org/@ref": "AA-AAA-123456789", + "iati-identifier": "AA-AAA-123456789-ABC124", + "reporting-org/@type": "40", + "description/narrative": "Another description", + "activity-date/@iso-date": "2016-01-01", + "activity-status/@code": "3", + }, + ] + assert set(parser.sub_sheets.keys()) == set(["transaction", "recipient-country"]) + assert list(parser.sub_sheets["transaction"]) == [ + "iati-identifier", + "transaction/0/transaction-type/@code", + "transaction/0/transaction-date/@iso-date", + "transaction/0/value/@value-date", + "transaction/0/value", + ] + assert parser.sub_sheets["transaction"].lines == [ + { + "transaction/0/value/@value-date": "2012-01-01", + "iati-identifier": "AA-AAA-123456789-ABC123", + "transaction/0/transaction-date/@iso-date": "2012-01-01", + "transaction/0/value": "10", + "transaction/0/transaction-type/@code": "2", + }, + { + "transaction/0/value/@value-date": "2012-03-03", + "iati-identifier": "AA-AAA-123456789-ABC123", + "transaction/0/transaction-date/@iso-date": "2012-03-03", + "transaction/0/value": "20", + "transaction/0/transaction-type/@code": "3", + }, + { + "transaction/0/value/@value-date": "2013-04-04", + "iati-identifier": "AA-AAA-123456789-ABC124", + "transaction/0/transaction-date/@iso-date": "2013-04-04", + "transaction/0/value": "30", + "transaction/0/transaction-type/@code": "2", + }, + { + "transaction/0/value/@value-date": "2013-05-05", + "iati-identifier": "AA-AAA-123456789-ABC124", + "transaction/0/transaction-date/@iso-date": "2013-05-05", + "transaction/0/value": "40", + "transaction/0/transaction-type/@code": "3", + }, ] - assert set(parser.sub_sheets.keys()) == set(['transaction', 'recipient-country']) - assert list(parser.sub_sheets['transaction']) == ['iati-identifier', 'transaction/0/transaction-type/@code', 'transaction/0/transaction-date/@iso-date', 'transaction/0/value/@value-date', 'transaction/0/value'] - assert parser.sub_sheets['transaction'].lines == [ - {'transaction/0/value/@value-date': '2012-01-01', 'iati-identifier': 'AA-AAA-123456789-ABC123', 'transaction/0/transaction-date/@iso-date': '2012-01-01', 'transaction/0/value': '10', 'transaction/0/transaction-type/@code': '2'}, - {'transaction/0/value/@value-date': '2012-03-03', 'iati-identifier': 'AA-AAA-123456789-ABC123', 'transaction/0/transaction-date/@iso-date': '2012-03-03', 'transaction/0/value': '20', 'transaction/0/transaction-type/@code': '3'}, - {'transaction/0/value/@value-date': '2013-04-04', 'iati-identifier': 'AA-AAA-123456789-ABC124', 'transaction/0/transaction-date/@iso-date': '2013-04-04', 'transaction/0/value': '30', 'transaction/0/transaction-type/@code': '2'}, - {'transaction/0/value/@value-date': '2013-05-05', 'iati-identifier': 'AA-AAA-123456789-ABC124', 'transaction/0/transaction-date/@iso-date': '2013-05-05', 'transaction/0/value': '40', 'transaction/0/transaction-type/@code': '3'} + assert list(parser.sub_sheets["recipient-country"]) == [ + "iati-identifier", + "recipient-country/0/@code", + "recipient-country/0/@percentage", ] - assert list(parser.sub_sheets['recipient-country']) == ['iati-identifier', 'recipient-country/0/@code', 'recipient-country/0/@percentage'] - assert parser.sub_sheets['recipient-country'].lines == [ - {'iati-identifier': 'AA-AAA-123456789-ABC123', 'recipient-country/0/@code': 'AF', 'recipient-country/0/@percentage': '40'}, - {'iati-identifier': 'AA-AAA-123456789-ABC123', 'recipient-country/0/@code': 'XK', 'recipient-country/0/@percentage': '60'}, - {'iati-identifier': 'AA-AAA-123456789-ABC124', 'recipient-country/0/@code': 'AG', 'recipient-country/0/@percentage': '30'}, - {'iati-identifier': 'AA-AAA-123456789-ABC124', 'recipient-country/0/@code': 'XK', 'recipient-country/0/@percentage': '70'} + assert parser.sub_sheets["recipient-country"].lines == [ + { + "iati-identifier": "AA-AAA-123456789-ABC123", + "recipient-country/0/@code": "AF", + "recipient-country/0/@percentage": "40", + }, + { + "iati-identifier": "AA-AAA-123456789-ABC123", + "recipient-country/0/@code": "XK", + "recipient-country/0/@percentage": "60", + }, + { + "iati-identifier": "AA-AAA-123456789-ABC124", + "recipient-country/0/@code": "AG", + "recipient-country/0/@percentage": "30", + }, + { + "iati-identifier": "AA-AAA-123456789-ABC124", + "recipient-country/0/@code": "XK", + "recipient-country/0/@percentage": "70", + }, ] def test_varyin_transaction_count(): parser = JSONParser( - json_filename='flattentool/tests/fixtures/varying_transaction_count.xml', - root_list_path='iati-activity', + json_filename="flattentool/tests/fixtures/varying_transaction_count.xml", + root_list_path="iati-activity", schema_parser=None, - root_id='', + root_id="", xml=True, - id_name='iati-identifier') + id_name="iati-identifier", + ) parser.parse() - assert list(parser.main_sheet) == ['iati-identifier'] + assert list(parser.main_sheet) == ["iati-identifier"] assert parser.main_sheet.lines == [ - {'iati-identifier': 'AA-AAA-123456789-ABC123'}, - {'iati-identifier': 'AA-AAA-123456789-ABC124'}, - {'iati-identifier': 'AA-AAA-123456789-ABC125'}, + {"iati-identifier": "AA-AAA-123456789-ABC123"}, + {"iati-identifier": "AA-AAA-123456789-ABC124"}, + {"iati-identifier": "AA-AAA-123456789-ABC125"}, ] - assert set(parser.sub_sheets.keys()) == set(['transaction']) - assert list(parser.sub_sheets['transaction']) == ['iati-identifier', 'transaction/0/transaction-date/@iso-date', 'transaction/0/value/@value-date', 'transaction/0/value'] - assert parser.sub_sheets['transaction'].lines == [ - {'iati-identifier': 'AA-AAA-123456789-ABC123', 'transaction/0/value/@value-date': '2012-01-01', 'transaction/0/transaction-date/@iso-date': '2012-01-01', 'transaction/0/value': '10'}, - {'iati-identifier': 'AA-AAA-123456789-ABC123', 'transaction/0/value/@value-date': '2012-02-02', 'transaction/0/transaction-date/@iso-date': '2012-02-02', 'transaction/0/value': '20'}, - {'iati-identifier': 'AA-AAA-123456789-ABC125', 'transaction/0/value/@value-date': '2012-03-03', 'transaction/0/transaction-date/@iso-date': '2012-03-03', 'transaction/0/value': '30'}, + assert set(parser.sub_sheets.keys()) == set(["transaction"]) + assert list(parser.sub_sheets["transaction"]) == [ + "iati-identifier", + "transaction/0/transaction-date/@iso-date", + "transaction/0/value/@value-date", + "transaction/0/value", + ] + assert parser.sub_sheets["transaction"].lines == [ + { + "iati-identifier": "AA-AAA-123456789-ABC123", + "transaction/0/value/@value-date": "2012-01-01", + "transaction/0/transaction-date/@iso-date": "2012-01-01", + "transaction/0/value": "10", + }, + { + "iati-identifier": "AA-AAA-123456789-ABC123", + "transaction/0/value/@value-date": "2012-02-02", + "transaction/0/transaction-date/@iso-date": "2012-02-02", + "transaction/0/value": "20", + }, + { + "iati-identifier": "AA-AAA-123456789-ABC125", + "transaction/0/value/@value-date": "2012-03-03", + "transaction/0/transaction-date/@iso-date": "2012-03-03", + "transaction/0/value": "30", + }, ] def test_lists_of_dicts_paths(): assert list(lists_of_dicts_paths({})) == [] - assert list(lists_of_dicts_paths({'a': [{}]})) == [('a',)] - assert list(lists_of_dicts_paths({'a': [{'d': 'str1'}]})) == [('a',)] - assert list(lists_of_dicts_paths({'a': [{'b': [{'d': 'str1'}]}]})) == [('a',), ('a', 'b')] - assert list(lists_of_dicts_paths({'a': [{'b': {'d': 'str1'}}]})) == [('a',)] - assert list(lists_of_dicts_paths({'a': [{'b': {'d': 'str1'}}, {'b': [{}]}]})) == [('a',), ('a', 'b')] - assert list(lists_of_dicts_paths({'a': {'b': {'c': [{'d': 'str1'}]}}})) == [('a', 'b', 'c')] + assert list(lists_of_dicts_paths({"a": [{}]})) == [("a",)] + assert list(lists_of_dicts_paths({"a": [{"d": "str1"}]})) == [("a",)] + assert list(lists_of_dicts_paths({"a": [{"b": [{"d": "str1"}]}]})) == [ + ("a",), + ("a", "b"), + ] + assert list(lists_of_dicts_paths({"a": [{"b": {"d": "str1"}}]})) == [("a",)] + assert list(lists_of_dicts_paths({"a": [{"b": {"d": "str1"}}, {"b": [{}]}]})) == [ + ("a",), + ("a", "b"), + ] + assert list(lists_of_dicts_paths({"a": {"b": {"c": [{"d": "str1"}]}}})) == [ + ("a", "b", "c") + ] def test_dicts_to_list_of_dicts(): - xml_dict = {'a': {'b': {'c': {'d': 'aStr'}}}} - dicts_to_list_of_dicts({('x', 'y', 'z'), ('a', 'b', 'c')}, xml_dict) - assert xml_dict == {'a': {'b': {'c': [{'d': 'aStr'}]}}} + xml_dict = {"a": {"b": {"c": {"d": "aStr"}}}} + dicts_to_list_of_dicts({("x", "y", "z"), ("a", "b", "c")}, xml_dict) + assert xml_dict == {"a": {"b": {"c": [{"d": "aStr"}]}}} def test_list_dict_consistency(): - xml_dict = {'a': [{'b': {'d': 'str1'}}, {'b': {'d': 'str2'}}]} + xml_dict = {"a": [{"b": {"d": "str1"}}, {"b": {"d": "str2"}}]} list_dict_consistency(xml_dict) - assert xml_dict == {'a': [{'b': {'d': 'str1'}}, {'b': {'d': 'str2'}}]} + assert xml_dict == {"a": [{"b": {"d": "str1"}}, {"b": {"d": "str2"}}]} - xml_dict = {'a': [{'b': {'d': 'str1'}}, {'b': [{'d': 'str2'}]}]} + xml_dict = {"a": [{"b": {"d": "str1"}}, {"b": [{"d": "str2"}]}]} list_dict_consistency(xml_dict) - assert xml_dict == {'a': [{'b': [{'d': 'str1'}]}, {'b': [{'d': 'str2'}]}]} + assert xml_dict == {"a": [{"b": [{"d": "str1"}]}, {"b": [{"d": "str2"}]}]} - xml_dict = {'a': [{'b': [{'d': 'str1'}]}, {'b': {'d': 'str2'}}]} + xml_dict = {"a": [{"b": [{"d": "str1"}]}, {"b": {"d": "str2"}}]} list_dict_consistency(xml_dict) - assert xml_dict == {'a': [{'b': [{'d': 'str1'}]}, {'b': [{'d': 'str2'}]}]} + assert xml_dict == {"a": [{"b": [{"d": "str1"}]}, {"b": [{"d": "str2"}]}]} # Wrapped in a dict - xml_dict = {'c': {'a': [{'b': {'d': 'str1'}}, {'b': {'d': 'str2'}}]}} + xml_dict = {"c": {"a": [{"b": {"d": "str1"}}, {"b": {"d": "str2"}}]}} list_dict_consistency(xml_dict) - assert xml_dict == {'c': {'a': [{'b': {'d': 'str1'}}, {'b': {'d': 'str2'}}]}} + assert xml_dict == {"c": {"a": [{"b": {"d": "str1"}}, {"b": {"d": "str2"}}]}} - xml_dict = {'c': {'a': [{'b': {'d': 'str1'}}, {'b': [{'d': 'str2'}]}]}} + xml_dict = {"c": {"a": [{"b": {"d": "str1"}}, {"b": [{"d": "str2"}]}]}} list_dict_consistency(xml_dict) - assert xml_dict == {'c': {'a': [{'b': [{'d': 'str1'}]}, {'b': [{'d': 'str2'}]}]}} + assert xml_dict == {"c": {"a": [{"b": [{"d": "str1"}]}, {"b": [{"d": "str2"}]}]}} - xml_dict = {'c': {'a': [{'b': [{'d': 'str1'}]}, {'b': {'d': 'str2'}}]}} + xml_dict = {"c": {"a": [{"b": [{"d": "str1"}]}, {"b": {"d": "str2"}}]}} list_dict_consistency(xml_dict) - assert xml_dict == {'c': {'a': [{'b': [{'d': 'str1'}]}, {'b': [{'d': 'str2'}]}]}} + assert xml_dict == {"c": {"a": [{"b": [{"d": "str1"}]}, {"b": [{"d": "str2"}]}]}} # Wrapped in a list of dicts - xml_dict = {'c': [{'a': [{'b': {'d': 'str1'}}, {'b': {'d': 'str2'}}]}]} + xml_dict = {"c": [{"a": [{"b": {"d": "str1"}}, {"b": {"d": "str2"}}]}]} list_dict_consistency(xml_dict) - assert xml_dict == {'c': [{'a': [{'b': {'d': 'str1'}}, {'b': {'d': 'str2'}}]}]} + assert xml_dict == {"c": [{"a": [{"b": {"d": "str1"}}, {"b": {"d": "str2"}}]}]} - xml_dict = {'c': [{'a': [{'b': {'d': 'str1'}}, {'b': [{'d': 'str2'}]}]}]} + xml_dict = {"c": [{"a": [{"b": {"d": "str1"}}, {"b": [{"d": "str2"}]}]}]} list_dict_consistency(xml_dict) - assert xml_dict == {'c': [{'a': [{'b': [{'d': 'str1'}]}, {'b': [{'d': 'str2'}]}]}]} + assert xml_dict == {"c": [{"a": [{"b": [{"d": "str1"}]}, {"b": [{"d": "str2"}]}]}]} - xml_dict = {'c': [{'a': [{'b': [{'d': 'str1'}]}, {'b': {'d': 'str2'}}]}]} + xml_dict = {"c": [{"a": [{"b": [{"d": "str1"}]}, {"b": {"d": "str2"}}]}]} list_dict_consistency(xml_dict) - assert xml_dict == {'c': [{'a': [{'b': [{'d': 'str1'}]}, {'b': [{'d': 'str2'}]}]}]} + assert xml_dict == {"c": [{"a": [{"b": [{"d": "str1"}]}, {"b": [{"d": "str2"}]}]}]} def test_xml_whitespace(): parser = JSONParser( - json_filename='flattentool/tests/fixtures/narrative_whitespace.xml', - root_list_path='iati-activity', + json_filename="flattentool/tests/fixtures/narrative_whitespace.xml", + root_list_path="iati-activity", schema_parser=None, - root_id='', + root_id="", xml=True, - id_name='iati-identifier') + id_name="iati-identifier", + ) try: parser.parse() diff --git a/flattentool/xml_output.py b/flattentool/xml_output.py index 86da5e40..a689d14c 100644 --- a/flattentool/xml_output.py +++ b/flattentool/xml_output.py @@ -1,7 +1,12 @@ from collections import OrderedDict from warnings import warn + +from flattentool.exceptions import DataErrorWarning +from flattentool.sort_xml import XMLSchemaWalker, sort_element + try: import lxml.etree as ET + # If we're using lxml we have to do some extra work to support namespaces, # so we have a variable to check whether we're using lxml: USING_LXML = True @@ -10,46 +15,55 @@ # However, stdlib etree still exists as an unsupported feature. except ImportError: import xml.etree.ElementTree as ET + USING_LXML = False - warn('Using stdlib etree may work, but is not supported. Please install lxml.') -from flattentool.exceptions import DataErrorWarning -from flattentool.sort_xml import sort_element, XMLSchemaWalker + warn("Using stdlib etree may work, but is not supported. Please install lxml.") def sort_attributes(data): attribs = [] other = [] for k, v in data.items(): - (other, attribs)[k.startswith('@')].append((k, v)) + (other, attribs)[k.startswith("@")].append((k, v)) return OrderedDict(sorted(attribs) + other) def child_to_xml(parent_el, tagname, child, toplevel=False, nsmap=None): - if hasattr(child, 'items'): + if hasattr(child, "items"): child_el = dict_to_xml(child, tagname, toplevel=False, nsmap=nsmap) if child_el is not None: parent_el.append(child_el) else: - if tagname.startswith('@'): - if USING_LXML and toplevel and tagname.startswith('@xmlns'): - nsmap[tagname[1:].split(':', 1)[1]] = str(child) + if tagname.startswith("@"): + if USING_LXML and toplevel and tagname.startswith("@xmlns"): + nsmap[tagname[1:].split(":", 1)[1]] = str(child) return try: attr_name = tagname[1:] - if USING_LXML and ':' in attr_name: - attr_name = '{' + nsmap.get(attr_name.split(':', 1)[0], '') + '}' + attr_name.split(':', 1)[1] + if USING_LXML and ":" in attr_name: + attr_name = ( + "{" + + nsmap.get(attr_name.split(":", 1)[0], "") + + "}" + + attr_name.split(":", 1)[1] + ) parent_el.attrib[attr_name] = str(child) except ValueError as e: warn(str(e), DataErrorWarning) - elif tagname == 'text()': + elif tagname == "text()": parent_el.text = str(child) else: - raise('Everything should end with text() or an attribute!') + raise ("Everything should end with text() or an attribute!") def dict_to_xml(data, tagname, toplevel=True, nsmap=None): - if USING_LXML and ':' in tagname and not toplevel: - tagname = '{' + nsmap.get(tagname.split(':', 1)[0], '') + '}' + tagname.split(':', 1)[1] + if USING_LXML and ":" in tagname and not toplevel: + tagname = ( + "{" + + nsmap.get(tagname.split(":", 1)[0], "") + + "}" + + tagname.split(":", 1)[1] + ) try: if USING_LXML: el = ET.Element(tagname, nsmap=nsmap) @@ -71,10 +85,16 @@ def dict_to_xml(data, tagname, toplevel=True, nsmap=None): return el -def toxml(data, xml_root_tag, xml_schemas=None, root_list_path='iati-activity', xml_comment=None): +def toxml( + data, + xml_root_tag, + xml_schemas=None, + root_list_path="iati-activity", + xml_comment=None, +): nsmap = { # This is "bound by definition" - see https://www.w3.org/XML/1998/namespace - 'xml': 'http://www.w3.org/XML/1998/namespace' + "xml": "http://www.w3.org/XML/1998/namespace" } root = dict_to_xml(data, xml_root_tag, nsmap=nsmap) if xml_schemas is not None: @@ -82,10 +102,12 @@ def toxml(data, xml_root_tag, xml_schemas=None, root_list_path='iati-activity', for element in root: sort_element(element, schema_dict) if xml_comment is None: - xml_comment = 'XML generated by flatten-tool' + xml_comment = "XML generated by flatten-tool" comment = ET.Comment(xml_comment) root.insert(0, comment) if USING_LXML: - return ET.tostring(root, pretty_print=True, xml_declaration=True, encoding='utf-8') + return ET.tostring( + root, pretty_print=True, xml_declaration=True, encoding="utf-8" + ) else: return ET.tostring(root) diff --git a/requirements_dev.txt b/requirements_dev.txt index 1d7b1dda..00f956db 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -6,3 +6,5 @@ coveralls pytest-localserver sphinx sphinx_rtd_theme +isort +flake8 diff --git a/setup.cfg b/setup.cfg index 791f075d..43fca987 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,3 @@ [flake8] -max-line-length = 119 +# Ignore style checking, because black does this for us +ignore = E,W diff --git a/setup.py b/setup.py index f2f81646..64f29816 100644 --- a/setup.py +++ b/setup.py @@ -1,20 +1,25 @@ from setuptools import setup -install_requires = ['jsonref', 'schema', 'openpyxl>=2.6,!=3.0.2', 'pytz', - 'xmltodict', 'lxml', 'odfpy'] +install_requires = [ + "jsonref", + "schema", + "openpyxl>=2.6,!=3.0.2", + "pytz", + "xmltodict", + "lxml", + "odfpy", +] setup( - name='flattentool', - version='0.11.0', - author='Open Data Services', - author_email='code@opendataservices.coop', - packages=['flattentool'], - scripts=['flatten-tool'], - url='https://github.com/OpenDataServices/flatten-tool', - license='MIT', - description='Tools for generating CSV and other flat versions of the structured data', + name="flattentool", + version="0.11.0", + author="Open Data Services", + author_email="code@opendataservices.coop", + packages=["flattentool"], + scripts=["flatten-tool"], + url="https://github.com/OpenDataServices/flatten-tool", + license="MIT", + description="Tools for generating CSV and other flat versions of the structured data", install_requires=install_requires, - extras_require = { - 'HTTP': ['requests'] - } + extras_require={"HTTP": ["requests"]}, )