From b3183222be90a05368a7f6efb837044cac60b6a6 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 31 Jul 2024 12:59:50 +0000 Subject: [PATCH] WIP namespace unflattening --- examples/iati_namespaces/cmd.txt | 1 + examples/iati_namespaces/expected.xml | 60 +++++++++++++++++++ .../iati-activities-schema.xsd | 1 + examples/iati_namespaces/iati-common.xsd | 1 + .../iati-organisations-schema.xsd | 1 + examples/iati_namespaces/main.csv | 3 + examples/iati_namespaces/transactions.csv | 5 ++ flattentool/tests/test_docs.py | 2 +- flattentool/xml_output.py | 54 +++++++++++------ 9 files changed, 110 insertions(+), 18 deletions(-) create mode 100644 examples/iati_namespaces/cmd.txt create mode 100644 examples/iati_namespaces/expected.xml create mode 120000 examples/iati_namespaces/iati-activities-schema.xsd create mode 120000 examples/iati_namespaces/iati-common.xsd create mode 120000 examples/iati_namespaces/iati-organisations-schema.xsd create mode 100644 examples/iati_namespaces/main.csv create mode 100644 examples/iati_namespaces/transactions.csv diff --git a/examples/iati_namespaces/cmd.txt b/examples/iati_namespaces/cmd.txt new file mode 100644 index 00000000..fa3543be --- /dev/null +++ b/examples/iati_namespaces/cmd.txt @@ -0,0 +1 @@ +$ flatten-tool unflatten --xml --id-name iati-identifier --root-list-path iati-activity --xml-schema examples/iati/iati-activities-schema.xsd examples/iati/iati-common.xsd -f csv examples/iati_namespaces diff --git a/examples/iati_namespaces/expected.xml b/examples/iati_namespaces/expected.xml new file mode 100644 index 00000000..ed5b23b4 --- /dev/null +++ b/examples/iati_namespaces/expected.xml @@ -0,0 +1,60 @@ + + + + + AA-AAA-123456789-ABC123 + + Organisation name + + + <narrative>A title</narrative> + + + A description + + + + + + + + + + 10 + + + + + 20 + + one + + + AA-AAA-123456789-ABC124 + + Organisation name + + + <narrative>Another title</narrative> + + + Another description + + + + + + + + + + 30 + + + + + 40 + + two + + diff --git a/examples/iati_namespaces/iati-activities-schema.xsd b/examples/iati_namespaces/iati-activities-schema.xsd new file mode 120000 index 00000000..03959eeb --- /dev/null +++ b/examples/iati_namespaces/iati-activities-schema.xsd @@ -0,0 +1 @@ +../iati/iati-activities-schema.xsd \ No newline at end of file diff --git a/examples/iati_namespaces/iati-common.xsd b/examples/iati_namespaces/iati-common.xsd new file mode 120000 index 00000000..e426274d --- /dev/null +++ b/examples/iati_namespaces/iati-common.xsd @@ -0,0 +1 @@ +../iati/iati-common.xsd \ No newline at end of file diff --git a/examples/iati_namespaces/iati-organisations-schema.xsd b/examples/iati_namespaces/iati-organisations-schema.xsd new file mode 120000 index 00000000..923564e3 --- /dev/null +++ b/examples/iati_namespaces/iati-organisations-schema.xsd @@ -0,0 +1 @@ +../iati/iati-organisations-schema.xsd \ No newline at end of file diff --git a/examples/iati_namespaces/main.csv b/examples/iati_namespaces/main.csv new file mode 100644 index 00000000..7c217480 --- /dev/null +++ b/examples/iati_namespaces/main.csv @@ -0,0 +1,3 @@ +iati-identifier,reporting-org/@ref,reporting-org/@type,reporting-org/narrative,participating-org/@role,participating-org/@ref,activity-status/@code,activity-date/@type,activity-date/@iso-date,recipient-country/0/@code,recipient-country/0/@percentage,recipient-country/1/@code,recipient-country/1/@percentage,title/narrative,description/narrative,@last-updated-datetime,@xmlns:customns,customns:test +AA-AAA-123456789-ABC123,AA-AAA-123456789,40,Organisation name,1,AA-AAA-123456789,2,1,2011-10-01,AF,40,XK,60,A title,A description,2011-10-01T00:00:00+00:00,http://example.com/1,one +AA-AAA-123456789-ABC124,AA-AAA-123456789,40,Organisation name,1,AA-AAA-123456789,3,2,2016-01-01,AG,30,XK,70,Another title,Another description,2016-01-01T00:00:00+00:00,http://example.com/2,two diff --git a/examples/iati_namespaces/transactions.csv b/examples/iati_namespaces/transactions.csv new file mode 100644 index 00000000..b3e47820 --- /dev/null +++ b/examples/iati_namespaces/transactions.csv @@ -0,0 +1,5 @@ +iati-identifier,transaction/0/transaction-type/@code,transaction/0/transaction-date/@iso-date,transaction/0/value/@value-date,transaction/0/value +AA-AAA-123456789-ABC123,2,2012-01-01,2012-01-01,10 +AA-AAA-123456789-ABC123,3,2012-03-03,2012-03-03,20 +AA-AAA-123456789-ABC124,2,2013-04-04,2013-04-04,30 +AA-AAA-123456789-ABC124,3,2013-05-05,2013-05-05,40 diff --git a/flattentool/tests/test_docs.py b/flattentool/tests/test_docs.py index 13793b84..f44cbe87 100644 --- a/flattentool/tests/test_docs.py +++ b/flattentool/tests/test_docs.py @@ -152,7 +152,7 @@ def _test_example_in_doc_worker(root, filename): def test_expected_number_of_examples_in_docs_data(): - expected = 67 + expected = 68 # See _get_examples_in_docs_data() if sys.version_info[:2] != (3, 8): expected -= 3 diff --git a/flattentool/xml_output.py b/flattentool/xml_output.py index 6d18fb0e..de4bd467 100644 --- a/flattentool/xml_output.py +++ b/flattentool/xml_output.py @@ -35,15 +35,16 @@ def sort_attributes(data): return OrderedDict(sorted(attribs) + other) -def child_to_xml(parent_el, tagname, child, toplevel=False, nsmap=None): +def child_to_xml(child_elements, attrib, tagname, child, toplevel=False, nsmap=None): if hasattr(child, "items"): child_el = dict_to_xml(child, tagname, toplevel=False, nsmap=nsmap) if child_el is not None: - parent_el.append(child_el) + child_elements.append(child_el) else: if tagname.startswith("@"): - if USING_LXML and toplevel and tagname.startswith("@xmlns"): - nsmap[tagname[1:].split(":", 1)[1]] = str(child) + if USING_LXML and tagname.startswith("@xmlns"): + nsname = tagname[1:].split(":", 1)[1] + nsmap[nsname] = str(child) return try: attr_name = tagname[1:] @@ -54,41 +55,60 @@ def child_to_xml(parent_el, tagname, child, toplevel=False, nsmap=None): + "}" + attr_name.split(":", 1)[1] ) - parent_el.attrib[attr_name] = str(child) + attrib[attr_name] = str(child) except ValueError as e: warn(str(e), DataErrorWarning) elif tagname == "text()": - parent_el.text = str(child) + return str(child) else: raise FlattenToolError("Everything should end with text() or an attribute!") + return None def dict_to_xml(data, tagname, toplevel=True, nsmap=None): - if USING_LXML and ":" in tagname and not toplevel: + if USING_LXML and ":" in tagname and not tagname.startswith("@xmlns"): tagname = ( "{" + nsmap.get(tagname.split(":", 1)[0], "") + "}" + tagname.split(":", 1)[1] ) - try: - if USING_LXML: - el = ET.Element(tagname, nsmap=nsmap) - else: - el = ET.Element(tagname) - except ValueError as e: - warn(str(e), DataErrorWarning) - return if USING_LXML: data = sort_attributes(data) + # We must do this in this order in order to... + child_elements = [] + attrib = {} + text = None + for k, v in data.items(): if type(v) == list: for item in v: - child_to_xml(el, k, item, nsmap=nsmap) + t = child_to_xml(child_elements, attrib, k, item, nsmap=nsmap) + if t: + text = t + else: + t = child_to_xml( + child_elements, attrib, k, v, toplevel=toplevel, nsmap=nsmap + ) + if t: + text = t + + try: + if USING_LXML: + el = ET.Element(tagname, attrib=attrib, nsmap=nsmap) else: - child_to_xml(el, k, v, toplevel=toplevel, nsmap=nsmap) + el = ET.Element(tagname, attrib=attrib) + except ValueError as e: + warn(str(e), DataErrorWarning) + return + + for child_el in child_elements: + el.append(child_el) + if text: + el.text = text + return el