Skip to content

Commit

Permalink
#40 add PacketBuffer Filter and Component-instance-lookup in Chain to…
Browse files Browse the repository at this point in the history
… help Unit test result inspection/assertions
  • Loading branch information
justb4 committed Aug 7, 2016
1 parent 3013c33 commit 16e9646
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 0 deletions.
48 changes: 48 additions & 0 deletions stetl/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,54 @@ def add(self, etl_comp):
# Remember current
self.cur_comp = etl_comp

def get_by_class(self, clazz):
"""
Get Component instance from Chain by class, mainly for testing.
:param clazz:
:return Component:
"""
cur_comp = self.first_comp
while cur_comp:
if cur_comp.__class__ == clazz:
return cur_comp

# Try next in Chain
cur_comp = cur_comp.next

return None


def get_by_id(self, id):
"""
Get Component instance from Chain, mainly for testing.
:param name:
:return Component:
"""
cur_comp = self.first_comp
while cur_comp:
if cur_comp.get_id() == id:
return cur_comp

# Try next in Chain
cur_comp = cur_comp.next

return None

def get_by_index(self, index):
"""
Get Component instance from Chain by position/index in Chain, mainly for testing.
:param clazz:
:return Component:
"""
cur_comp = self.first_comp
i = 0
while cur_comp and i < index:
# Try next in Chain
cur_comp = cur_comp.next
i += 1

return cur_comp

def run(self):
"""
Run the ETL Chain.
Expand Down
5 changes: 5 additions & 0 deletions stetl/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(self, configdict, section, consumes=FORMAT.none, produces=FORMAT.no
# The actual typed values as populated within Config Decorator
self.cfg_vals = dict()
self.next = None
self.section = section

# First assume single output provided by derived class
self._output_format = produces
Expand Down Expand Up @@ -142,6 +143,10 @@ def add_next(self, next_component):
raise ValueError(
'Incompatible components are linked: %s and %s' % (str(self), str(self.next)))

# Get our id: currently the [section] name
def get_id(self):
return self.section

# Check our compatibility with the next Component in the Chain
def is_compatible(self):

Expand Down
27 changes: 27 additions & 0 deletions stetl/filters/packetbuffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
#
# Packet buffering.
#
# Author:Just van den Broecke

import copy
from stetl.util import Util
from stetl.filter import Filter
from stetl.packet import FORMAT

log = Util.get_log("packetbuffer")

class PacketBuffer(Filter):
"""
Buffers all incoming Packets, main use is unit-testing to inspect Packets after ETL is done.
"""

# Constructor
def __init__(self, configdict, section):
Filter.__init__(self, configdict, section, consumes=FORMAT.any, produces=FORMAT.any)
self.packet_list = []

def invoke(self, packet):
# Buffer Packet and pass-through, we need a deep copy as Packets may be cleared/reused
self.packet_list.append(copy.copy(packet))
return packet
39 changes: 39 additions & 0 deletions tests/filters/configs/xmlassembler.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Config file for unit testing XmlAssembler.

[etl]
# chains = input_glob_file|parse_xml_file_filter|xml_assembler|output_std
chains = parse_xml_file_input|xml_assembler|packet_buffer|output_std

[input_glob_file]
class = inputs.fileinput.GlobFileInput
file_path = tests/data/dummy.gml

# The source input file producing XML elements
[parse_xml_file_filter]
class = filters.xmlelementreader.XmlElementReader
element_tags = FeatureMember

[parse_xml_file_input]
class = inputs.fileinput.XmlElementStreamerFileInput
element_tags = FeatureMember
file_path = tests/data/dummy.gml

# Assembles etree docs gml:featureMember elements, each with "max_elements" elements
[xml_assembler]
class = filters.xmlassembler.XmlAssembler
max_elements = 2
container_doc = <?xml version="1.0" encoding="UTF-8"?>
<gml:FeatureCollectionT10NL
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:top10nl="http://www.kadaster.nl/schemas/imbrt/top10nl/1.2"
xmlns:brt="http://www.kadaster.nl/schemas/imbrt/brt-alg/1.0"
xmlns:gml="http://www.opengis.net/gml/3.2"
xsi:schemaLocation="http://www.kadaster.nl/schemas/imbrt/top10nl/1.2 http://www.kadaster.nl/schemas/top10nl/vyyyymmdd/TOP10NL_1_2.xsd">
</gml:FeatureCollectionT10NL >
element_container_tag = FeatureCollectionT10NL

[packet_buffer]
class = filters.packetbuffer.PacketBuffer

[output_std]
class = outputs.standardoutput.StandardOutput
66 changes: 66 additions & 0 deletions tests/filters/test_xml_assembler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
#import re
import sys

from stetl.etl import ETL
from stetl.filters.xmlassembler import XmlAssembler
from stetl.filters.packetbuffer import PacketBuffer
from tests.stetl_test_case import StetlTestCase

class XmlAssemblerTest(StetlTestCase):
"""Unit tests for XmlAssembler"""

def setUp(self):
super(XmlAssemblerTest, self).setUp()

# Initialize Stetl
curr_dir = os.path.dirname(os.path.realpath(__file__))
cfg_dict = {'config_file': os.path.join(curr_dir, 'configs/xmlassembler.cfg')}
self.etl = ETL(cfg_dict)

def test_class(self):
chain = StetlTestCase.get_chain(self.etl)
section = StetlTestCase.get_section(chain, 1)
class_name = self.etl.configdict.get(section, 'class')

self.assertEqual('filters.xmlassembler.XmlAssembler', class_name)

def test_instance(self):
chain = StetlTestCase.get_chain(self.etl)

self.assertTrue(isinstance(chain.get_by_index(1), XmlAssembler))

def test_execute(self):
chain = StetlTestCase.get_chain(self.etl)
chain.run()

buffer_filter = chain.get_by_class(PacketBuffer)
packet_list = buffer_filter.packet_list

# most Packets are empty, but we need to find 2 filled with etree docs
doc_packet_list = []
for packet in packet_list:
if packet.data:
doc_packet_list.append(packet)

# Assertion: we need to see 2 documents
self.assertEqual(len(doc_packet_list), 2)
namespaces={'gml': 'http://www.opengis.net/gml/3.2', 'top10nl': 'http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0'}

# Assertion: first doc has two FeatureMember elements with proper Namespaces
xml_doc1 = doc_packet_list[0].data
feature_elms = xml_doc1.xpath('/gml:FeatureCollectionT10NL/top10nl:FeatureMember', namespaces=namespaces)
self.assertEqual(len(feature_elms), 2)

# Assertion: second doc has one FeatureMember with proper Namespaces
xml_doc2 = doc_packet_list[1].data
feature_elms = xml_doc2.xpath('/gml:FeatureCollectionT10NL/top10nl:FeatureMember', namespaces=namespaces)
self.assertEqual(len(feature_elms), 1)

# Assertion: first doc has end_of_doc but not end_of_stream set
self.assertTrue(doc_packet_list[0].end_of_doc, msg='doc1: end_of_doc if False')
self.assertFalse(doc_packet_list[0].end_of_stream, msg='doc1: end_of_stream is True')

# Assertion: second doc has end_of_doc and end_of_stream set
self.assertTrue(doc_packet_list[1].end_of_doc, msg='doc2: end_of_doc if False')
self.assertTrue(doc_packet_list[1].end_of_stream, msg='doc2: end_of_stream if False')

0 comments on commit 16e9646

Please sign in to comment.