From 896ffa3c7918901cc41ff32ded48d43c1894edfc Mon Sep 17 00:00:00 2001 From: Harpo Harbert Date: Thu, 7 Mar 2024 10:47:01 -0800 Subject: [PATCH 1/2] Adds test case for chowda#204 --- mario/pipelines/trigger.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 mario/pipelines/trigger.py diff --git a/mario/pipelines/trigger.py b/mario/pipelines/trigger.py new file mode 100644 index 0000000..57b8d79 --- /dev/null +++ b/mario/pipelines/trigger.py @@ -0,0 +1,22 @@ +from metaflow import FlowSpec, Parameter, step, trigger + + +@trigger(event='ampersand') +class TriggerPipeline(FlowSpec): + guid = Parameter('guid', help='GUID of the transcript to process') + pipeline = Parameter( + 'pipeline', help='Testing "&" handling in flow parameters', separator=',' + ) + + @step + def start(self): + print(f'Checking {self.pipeline} for "&" handling') + self.next(self.end) + + @step + def end(self): + print('Done!') + + +if __name__ == '__main__': + TriggerPipeline() From b00dfcdf46bad4b4cc3a229e196829d81bbad337 Mon Sep 17 00:00:00 2001 From: Harpo Harbert Date: Wed, 20 Mar 2024 11:29:29 -0700 Subject: [PATCH 2/2] Patches pipeline parsing to unescape `&` characters --- mario/pipelines/pipeline.py | 6 +++--- mario/pipelines/utils.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/mario/pipelines/pipeline.py b/mario/pipelines/pipeline.py index c17f759..feb9aeb 100644 --- a/mario/pipelines/pipeline.py +++ b/mario/pipelines/pipeline.py @@ -53,9 +53,9 @@ def start(self): def run_pipeline(self): """Run the mmif through a CLAMS pipeline""" mmif = self.input_mmif - print('starting pipeline') - print(self.pipeline) - for app in self.pipeline: + pipeline = self.clean_pipeline() + print('starting pipeline', pipeline) + for app in pipeline: print(f'Running {app}') mmif = self.app(app, mmif) print(f'{app} done') diff --git a/mario/pipelines/utils.py b/mario/pipelines/utils.py index 12687ea..cbdf310 100644 --- a/mario/pipelines/utils.py +++ b/mario/pipelines/utils.py @@ -189,3 +189,24 @@ def cleanup(self) -> None: remove(f) cleaned += 1 print(f'Cleaned up {cleaned} files') + + def clean_pipeline(self) -> list: + """Clean pipeline strings + + This is needed to restore `&` characters replaced by `\u0026` in the pipeline Parameter, + which is a result of ArgoEvent's parsing of the pipeline URL string as a body payload parameter, + which is written to the Argo-workflow kubernetes resource using `| toJson`, + which wraps the GoLang `json.Marshal` function, which escapes `&`, `<`, and `>` characters to HTML safe unicode. + This shows up as "\\u0026" in the python string. + + See https://github.com/WGBH-MLA/chowda/issues/204 + """ + + return [ + app.replace('\\u0026', '&') + .replace('\\u0028', '(') + .replace('\\u0029', ')') + .replace('\\u003c', '<') + .replace('\\u003e', '>') + for app in self.pipeline + ]