Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stetl bgt improvements #69

Merged
merged 8 commits into from
Feb 27, 2018
53 changes: 53 additions & 0 deletions stetl/filters/execfilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Executes the given command and returns the captured output.
#
# Author: Frank Steggink
#
import subprocess
import os
from stetl.filter import Filter
from stetl.util import Util
from stetl.packet import FORMAT

log = Util.get_log('execfilter')


class ExecFilter(Filter):
"""
Executes any command (abstract base class).
"""

def __init__(self, configdict, section, consumes, produces):
Filter.__init__(self, configdict, section, consumes, produces)

def invoke(self, packet):
return packet

def execute_cmd(self, cmd):
use_shell = True
if os.name == 'nt':
use_shell = False

log.info("executing cmd=%s" % cmd)
result = subprocess.check_output(cmd, shell=use_shell)
log.info("execute done")
return result


class CommandExecFilter(ExecFilter):
"""
Executes an arbitrary command and captures the output

consumes=FORMAT.string, produces=FORMAT.string
"""

def __init__(self, configdict, section):
ExecFilter.__init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.string)

def invoke(self, packet):
if packet.data is not None:
packet.data = self.execute_cmd(packet.data)

return packet
61 changes: 61 additions & 0 deletions stetl/filters/regexfilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Useful, an example will help, hard to grasp otherwise. Suggestions:

  • can't regexes be compiled once during init?
  • more uses expected? Maybe a baseclass RegexFilter and subclasses RegexToRecordFilter?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compilation: good point.
More uses: I haven't thought about it yet. It is possible, but at the moment I don't have any other concrete use cases yet. When looking at the possible formats, I think only struct will be a good option. Although formats like geojson_feature, ogr_feature and etree_element could represent the parsed data, they are too specialized. The output of regexfilter, a dictionary, is not something you would typically write directly.

# -*- coding: utf-8 -*-
#
# Extracts data from a string using a regular expression and generates a record.
#
# Author: Frank Steggink

from stetl.component import Config
from stetl.filter import Filter
from stetl.packet import FORMAT
from stetl.util import Util
import re

log = Util.get_log("regexfilter")


class RegexFilter(Filter):
"""
Extracts data from a string using a regular expression and returns the named groups as a record.
consumes=FORMAT.string, produces=FORMAT.record
"""

# Start attribute config meta
# Applying Decorator pattern with the Config class to provide
# read-only config values from the configured properties.

@Config(ptype=str, default=None, required=True)
def pattern_string(self):
"""
Regex pattern string. Should contain named groups.
"""
pass

# End attribute config meta

# Constructor
def __init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.record):
Filter.__init__(self, configdict, section, consumes, produces)

def init(self):
log.info('Init: regex filter')
if self.pattern_string is None:
# If no pattern_string is present:
err_s = 'The pattern_string needs to be configured'
log.error(err_s)
raise ValueError('The pattern_string needs to be configured')

def exit(self):
log.info('Exit: regex filter')

def invoke(self, packet):
if packet.data is None:
return packet

m = re.match(self.pattern_string, packet.data, re.S)
if m is not None:
packet.data = m.groupdict()
else:
packet.data = {}

return packet
19 changes: 16 additions & 3 deletions stetl/filters/templatingfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ class StringTemplatingFilter(TemplatingFilter):
consumes=FORMAT.record or FORMAT.record_array, produces=FORMAT.string
"""

@Config(ptype=bool, default=False, required=False)
def safe_substitution(self):
"""
Apply safe substitution?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly add more comment (I did not know e.g. about this standard option in Python Templates), like
if placeholders are missing from mapping and keywords, instead of raising an exception, the original placeholder will appear in the resulting string intact.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point. Usually I don't add comments for things which can be easily looked up.

"""
pass

def __init__(self, configdict, section):
TemplatingFilter.__init__(self, configdict, section, consumes=[FORMAT.record, FORMAT.record_array])

Expand All @@ -111,10 +118,16 @@ def create_template(self):
self.template = Template(self.template_string)

def render_template(self, packet):
if type(packet.data) is list:
packet.data = [self.template.substitute(item) for item in packet.data]
if self.safe_substitution:
if type(packet.data) is list:
packet.data = [self.template.safe_substitute(item) for item in packet.data]
else:
packet.data = self.template.safe_substitute(packet.data)
else:
packet.data = self.template.substitute(packet.data)
if type(packet.data) is list:
packet.data = [self.template.substitute(item) for item in packet.data]
else:
packet.data = self.template.substitute(packet.data)

return packet

Expand Down
9 changes: 8 additions & 1 deletion stetl/filters/zipfileextractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ def file_path(self):
"""
pass

@Config(ptype=bool, default=True, required=False)
def delete_file(self):
"""
Delete the file when the chain has been completed?
"""
pass

# End attribute config meta

# Constructor
Expand Down Expand Up @@ -58,7 +65,7 @@ def invoke(self, packet):

def after_chain_invoke(self, packet):
import os.path
if os.path.isfile(self.cur_file_path):
if os.path.isfile(self.cur_file_path) and self.delete_file:
os.remove(self.cur_file_path)

return True