-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stetl bgt improvements #69
Changes from 5 commits
030521c
c6f5e5a
2c06228
bf020b4
a5bfef6
4d75bba
3db9655
07670a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Executes the given command and returns the captured output. | ||
# | ||
# Author: Frank Steggink | ||
# | ||
import subprocess | ||
import os | ||
from stetl.filter import Filter | ||
from stetl.util import Util | ||
from stetl.packet import FORMAT | ||
|
||
log = Util.get_log('execfilter') | ||
|
||
|
||
class ExecFilter(Filter): | ||
""" | ||
Executes any command (abstract base class). | ||
""" | ||
|
||
def __init__(self, configdict, section, consumes, produces): | ||
Filter.__init__(self, configdict, section, consumes, produces) | ||
|
||
def invoke(self, packet): | ||
return packet | ||
|
||
def execute_cmd(self, cmd): | ||
use_shell = True | ||
if os.name == 'nt': | ||
use_shell = False | ||
|
||
log.info("executing cmd=%s" % cmd) | ||
result = subprocess.check_output(cmd, shell=use_shell) | ||
log.info("execute done") | ||
return result | ||
|
||
|
||
class CommandExecFilter(ExecFilter): | ||
""" | ||
Executes an arbitrary command and captures the output | ||
|
||
consumes=FORMAT.string, produces=FORMAT.string | ||
""" | ||
|
||
def __init__(self, configdict, section): | ||
ExecFilter.__init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.string) | ||
|
||
def invoke(self, packet): | ||
if packet.data is not None: | ||
packet.data = self.execute_cmd(packet.data) | ||
|
||
return packet |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Extracts data from a string using a regular expression and generates a record. | ||
# | ||
# Author: Frank Steggink | ||
|
||
from stetl.component import Config | ||
from stetl.filter import Filter | ||
from stetl.packet import FORMAT | ||
from stetl.util import Util | ||
import re | ||
|
||
log = Util.get_log("regexfilter") | ||
|
||
|
||
class RegexFilter(Filter): | ||
""" | ||
Extracts data from a string using a regular expression and returns the named groups as a record. | ||
consumes=FORMAT.string, produces=FORMAT.record | ||
""" | ||
|
||
# Start attribute config meta | ||
# Applying Decorator pattern with the Config class to provide | ||
# read-only config values from the configured properties. | ||
|
||
@Config(ptype=str, default=None, required=True) | ||
def pattern_string(self): | ||
""" | ||
Regex pattern string. Should contain named groups. | ||
""" | ||
pass | ||
|
||
# End attribute config meta | ||
|
||
# Constructor | ||
def __init__(self, configdict, section, consumes=FORMAT.string, produces=FORMAT.record): | ||
Filter.__init__(self, configdict, section, consumes, produces) | ||
|
||
def init(self): | ||
log.info('Init: regex filter') | ||
if self.pattern_string is None: | ||
# If no pattern_string is present: | ||
err_s = 'The pattern_string needs to be configured' | ||
log.error(err_s) | ||
raise ValueError('The pattern_string needs to be configured') | ||
|
||
def exit(self): | ||
log.info('Exit: regex filter') | ||
|
||
def invoke(self, packet): | ||
if packet.data is None: | ||
return packet | ||
|
||
m = re.match(self.pattern_string, packet.data, re.S) | ||
if m is not None: | ||
packet.data = m.groupdict() | ||
else: | ||
packet.data = {} | ||
|
||
return packet |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,6 +94,13 @@ class StringTemplatingFilter(TemplatingFilter): | |
consumes=FORMAT.record or FORMAT.record_array, produces=FORMAT.string | ||
""" | ||
|
||
@Config(ptype=bool, default=False, required=False) | ||
def safe_substitution(self): | ||
""" | ||
Apply safe substitution? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possibly add more comment (I did not know e.g. about this standard option in Python Templates), like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair point. Usually I don't add comments for things which can be easily looked up. |
||
""" | ||
pass | ||
|
||
def __init__(self, configdict, section): | ||
TemplatingFilter.__init__(self, configdict, section, consumes=[FORMAT.record, FORMAT.record_array]) | ||
|
||
|
@@ -111,10 +118,16 @@ def create_template(self): | |
self.template = Template(self.template_string) | ||
|
||
def render_template(self, packet): | ||
if type(packet.data) is list: | ||
packet.data = [self.template.substitute(item) for item in packet.data] | ||
if self.safe_substitution: | ||
if type(packet.data) is list: | ||
packet.data = [self.template.safe_substitute(item) for item in packet.data] | ||
else: | ||
packet.data = self.template.safe_substitute(packet.data) | ||
else: | ||
packet.data = self.template.substitute(packet.data) | ||
if type(packet.data) is list: | ||
packet.data = [self.template.substitute(item) for item in packet.data] | ||
else: | ||
packet.data = self.template.substitute(packet.data) | ||
|
||
return packet | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Useful, an example will help, hard to grasp otherwise. Suggestions:
RegexFilter
and subclassesRegexToRecordFilter
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compilation: good point.
More uses: I haven't thought about it yet. It is possible, but at the moment I don't have any other concrete use cases yet. When looking at the possible formats, I think only struct will be a good option. Although formats like geojson_feature, ogr_feature and etree_element could represent the parsed data, they are too specialized. The output of regexfilter, a dictionary, is not something you would typically write directly.