diff --git a/classes/local/step/compression_trait.php b/classes/local/step/compression_trait.php new file mode 100644 index 00000000..eade8a62 --- /dev/null +++ b/classes/local/step/compression_trait.php @@ -0,0 +1,265 @@ +. + +namespace tool_dataflows\local\step; + +use coding_exception; +use tool_dataflows\helper; + +/** + * File compression trait + * + * @package tool_dataflows + * @author Matthew Hilton + * @copyright Catalyst IT, 2022 + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +trait compression_trait { + /** + * Returns whether or not the step configured, has a side effect. + * + * @return bool whether or not this step has a side effect + */ + public function has_side_effect(): bool { + if (isset($this->stepdef)) { + $config = $this->get_variables()->get('config'); + return !helper::path_is_relative($config->to); + } + return true; + } + + /** + * Return the definition of the fields available in this form. + * + * @return array + */ + public static function form_define_fields(): array { + return [ + 'command' => ['type' => PARAM_TEXT, 'required' => true], + 'method' => ['type' => PARAM_TEXT, 'required' => true], + 'from' => ['type' => PARAM_TEXT, 'required' => true], + 'to' => ['type' => PARAM_TEXT, 'required' => true], + ]; + } + + /** + * Custom elements for editing the connector. + * + * @param \MoodleQuickForm $mform + */ + public function form_add_custom_inputs(\MoodleQuickForm &$mform) { + + $mform->addElement('select', 'config_command', get_string('compression:command', 'tool_dataflows'), [ + 'compress' => get_string('compression:compress', 'tool_dataflows'), + 'decompress' => get_string('compression:decompress', 'tool_dataflows'), + ]); + + // Build the selector options using the supported methods as the source. + $supportedmethods = $this->get_supported_methods(); + $methodkeys = array_keys($supportedmethods); + $methodvalues = array_column($supportedmethods, 'name'); + $methodoptions = array_combine($methodkeys, $methodvalues); + + $mform->addElement('select', 'config_method', get_string('compression:method', 'tool_dataflows'), $methodoptions); + + // From / Source path. + $mform->addElement('text', 'config_from', get_string('compression:from', 'tool_dataflows')); + $mform->addRule('config_from', get_string('required'), 'required', null, 'client'); + + // To / Target path. + $mform->addElement('text', 'config_to', get_string('compression:to', 'tool_dataflows')); + $mform->addRule('config_to', get_string('required'), 'required', null, 'client'); + } + + /** + * Executes the step + * + * @param mixed|null $input + * @return mixed + */ + public function execute($input = null) { + $variables = $this->get_variables(); + $config = $variables->get('config'); + $config->from = $this->enginestep->engine->resolve_path($config->from); + $config->to = $this->enginestep->engine->resolve_path($config->to); + + // Check that the from path exists. + if (!is_file($config->from)) { + $this->enginestep->log($config->from . ' file does not exist'); + $variables->set('vars.success', false); + return $input; + } + + // We do not need to go any further if it is a dry run. + if ($this->is_dry_run() && $this->has_side_effect()) { + return $input; + } + + $result = $this->execute_method($config); + + if ($result !== true) { + // Log the error. + $this->enginestep->log($result); + } + + $variables->set('vars.success', $result === true); + + return $input; + } + + /** + * Perform any extra validation that is required only for runs. + * + * @return true|array Will return true or an array of errors. + */ + public function validate_for_run() { + $config = $this->get_variables()->get('config'); + + $errors = []; + + $error = helper::path_validate($config->from); + if ($error !== true) { + $errors['config_from'] = $error; + } + + $error = helper::path_validate($config->to); + if ($error !== true) { + $errors['config_to'] = $error; + } + + // Valid the chosen methods executable is actually executable. + $method = $this->get_method($config->method); + $error = ($method->isexecutable)(); + if ($error !== true) { + $errors['config_method'] = $error; + } + + return $errors ?: true; + } + + /** + * A list of outputs and their description if applicable. + * + * @return array of outputs + */ + public function define_outputs(): array { + return ['success' => get_string('compression:output_success', 'tool_dataflows')]; + } + + /** + * Returns the method info that has been selected in the configuration. + * @param string $method method name + * @return object method information (name, path, etc...) + */ + private function get_method(string $method): object { + $methods = $this->get_supported_methods(); + + // If not defined it means something has gone very wrong. + // this should almost always be defined. + if (!isset($methods[$method])) { + throw new coding_exception($method . ' is not defined as a supported method.'); + } + + return $methods[$method]; + } + + /** + * Returns an array of supported methods by this step and information about them. + * @return array array of objects containing the information about each method + */ + private function get_supported_methods(): array { + return [ + 'gzip' => (object) [ + 'name' => get_string('compression:method:gzip', 'tool_dataflows'), + 'isexecutable' => function() { + return self::validate_executable(get_config('tool_dataflows', 'gzip_exec_path')); + } + ] + ]; + } + + /** + * Validates the executable + * @param string $path path to executable. + * @return string|true string if error, else true if valid. + */ + private static function validate_executable(string $path) { + if (!is_executable($path)) { + return get_string('compression:error:invalidexecutable', 'tool_dataflows', [ + 'path' => $path + ]); + } + + return true; + } + + /** + * Executes the configured method. + * + * @param object $config step configuration + * @return string|true string if error, else true if success + */ + private function execute_method($config) { + switch ($config->method) { + case 'gzip': + return $this->execute_gzip($config); + default: + throw new coding_exception($config->method . ' has no executable setup.'); + } + } + + /** + * Executes the gzip method. + * + * @param object $config + * @return string|error string if error, else true if success. + */ + private function execute_gzip($config) { + $gzip = get_config('tool_dataflows', 'gzip_exec_path'); + $from = escapeshellarg($config->from); + $to = escapeshellarg($config->to); + + $compressionmode = $config->command == 'decompress' ? '-d' : ''; + $movefilename = $config->command == 'compress' ? $config->from . '.gz' : rtrim($config->from, '.gz'); + $movefilename = escapeshellarg($movefilename); + + // See https://www.gnu.org/software/gzip/manual/html_node/Invoking-gzip.html. + // -f: force override destination file if it exists + // -v: verbose + // -k: keep input file + // 2>&1: pipe stderror to stdout. + $gzipcommand = "{$gzip} -f -v -k {$compressionmode} {$from} 2>&1 && mv {$movefilename} {$to}"; + $this->enginestep->log("Command: " . $gzipcommand); + + // Execute the gzip command. + $output = []; + $result = null; + exec($gzipcommand, $output, $result); + $success = $result === 0; + $this->enginestep->log( + $success + ? 'Success' + : 'Fail' . PHP_EOL . implode(PHP_EOL, $output) + ); + + // Emit in error logs. + if (!$success) { + return implode(PHP_EOL, $output); + } + + return true; + } +} diff --git a/classes/local/step/connector_compression.php b/classes/local/step/connector_compression.php new file mode 100644 index 00000000..d696fa5d --- /dev/null +++ b/classes/local/step/connector_compression.php @@ -0,0 +1,35 @@ +. + +namespace tool_dataflows\local\step; + +/** + * Connector step for GPG encryption/decryption + * + * @package tool_dataflows + * @author Matthew Hilton + * @copyright 2023, Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class connector_compression extends connector_step { + use compression_trait; + + /** @var int[] number of output flows (min, max). */ + protected $outputflows = [0, 1]; + + /** @var int[] number of output connectors (min, max). */ + protected $outputconnectors = [0, 1]; +} diff --git a/classes/local/step/flow_compression.php b/classes/local/step/flow_compression.php new file mode 100644 index 00000000..cb17f636 --- /dev/null +++ b/classes/local/step/flow_compression.php @@ -0,0 +1,35 @@ +. + +namespace tool_dataflows\local\step; + +/** + * Flow step for compression + * + * @package tool_dataflows + * @author Matthew Hilton + * @copyright 2023, Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class flow_compression extends flow_step { + use compression_trait; + + /** @var int[] number of output flows (min, max). */ + protected $outputflows = [0, 1]; + + /** @var int[] number of output connectors (min, max). */ + protected $outputconnectors = [0, 1]; +} diff --git a/lang/en/tool_dataflows.php b/lang/en/tool_dataflows.php index e6d73b2e..df79eaf1 100644 --- a/lang/en/tool_dataflows.php +++ b/lang/en/tool_dataflows.php @@ -41,6 +41,8 @@ def: 12 # Accessed as \${{global.vars.abc.def}}"; $string['gpg_exec_path'] = 'Path to GPG executable'; $string['gpg_exec_path_desc'] = 'Path to GPG executable'; +$string['gzip_exec_path'] = 'Path to gzip executable'; +$string['gzip_exec_path_desc'] = 'Path to gzip executable'; $string['gpg_key_dir'] = 'Path to keyring directory'; $string['gpg_key_dir_desc'] = 'Path to keyring directory'; $string['log_handlers'] = 'Log handlers'; @@ -129,6 +131,7 @@ $string['step_name_connector_sftp_directory_file_list'] = 'SFTP directory file list'; $string['step_name_connector_abort'] = 'Abort connector'; $string['step_name_connector_append_file'] = 'Append file'; +$string['step_name_connector_compression'] = 'Compression/Decompression'; $string['step_name_connector_copy_file'] = 'Copy File'; $string['step_name_connector_curl'] = 'Curl connector'; $string['step_name_connector_debug_file_display'] = 'File contents display'; @@ -147,6 +150,7 @@ $string['step_name_connector_wait'] = 'Wait'; $string['step_name_flow_abort'] = 'Abort'; $string['step_name_flow_append_file'] = 'Append'; +$string['step_name_flow_compression'] = 'Compression/decompression'; $string['step_name_flow_copy_file'] = 'Copy File'; $string['step_name_flow_curl'] = 'Curl'; $string['step_name_flow_email'] = 'Flow email notification'; @@ -599,3 +603,14 @@ // Remove file step. $string['remove_file:file'] = 'File path to be removed'; + +// Compression step. +$string['compression:compress'] = 'Compress'; +$string['compression:decompress'] = 'Decompress'; +$string['compression:command'] = 'Command'; +$string['compression:method'] = 'Method'; +$string['compression:method:gzip'] = 'gzip'; +$string['compression:from'] = 'Source'; +$string['compression:to'] = 'Destination'; +$string['compression:output_success'] = 'Success'; +$string['compression:error:invalidexecutable'] = '{$a->path} is not executable'; diff --git a/lib.php b/lib.php index 546208a7..f63625dd 100644 --- a/lib.php +++ b/lib.php @@ -51,6 +51,7 @@ function tool_dataflows_step_types() { new step\connector_abort, new step\connector_append_file, new step\connector_curl, + new step\connector_compression, new step\connector_copy_file, new step\connector_debug_file_display, new step\connector_debugging, @@ -71,6 +72,7 @@ function tool_dataflows_step_types() { new step\connector_wait, new step\flow_abort, new step\flow_append_file, + new step\flow_compression, new step\flow_copy_file, new step\flow_curl, new step\flow_email, diff --git a/settings.php b/settings.php index dc5f8a92..e2bce68c 100644 --- a/settings.php +++ b/settings.php @@ -128,6 +128,15 @@ ) ); + $settings->add( + new admin_setting_configexecutable( + 'tool_dataflows/gzip_exec_path', + get_string('gzip_exec_path', 'tool_dataflows'), + get_string('gzip_exec_path_desc', 'tool_dataflows'), + '/usr/bin/gzip' + ) + ); + $settings->add( new admin_setting_configtext( 'tool_dataflows/gpg_key_dir', diff --git a/tests/tool_dataflows_connector_compression_test.php b/tests/tool_dataflows_connector_compression_test.php new file mode 100644 index 00000000..abb4cc82 --- /dev/null +++ b/tests/tool_dataflows_connector_compression_test.php @@ -0,0 +1,158 @@ +. + +namespace tool_dataflows; + +use Symfony\Component\Yaml\Yaml; +use tool_dataflows\local\execution\engine; +use tool_dataflows\local\step\connector_compression; + +/** + * Unit test for the compression connector step. + * + * @package tool_dataflows + * @author Matthew Hilton + * @copyright 2023, Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + * @covers \tool_dataflows\local\step\connector_compression + */ +class tool_dataflows_connector_compression_test extends \advanced_testcase { + /** @var string $basedir base test directory for files **/ + private $basedir; + + /** + * Sets up tests + */ + protected function setUp(): void { + parent::setUp(); + $this->resetAfterTest(); + + $this->basedir = make_unique_writable_directory(make_temp_directory('tool_dataflows')); + set_config('permitted_dirs', $this->basedir, 'tool_dataflows'); + } + + /** + * Creates a test dataflow + * @param string $from from file + * @param string $tocompressed the destination that $from gets compressed to + * @param string $todecompressed the detination that $tocompressed gets decompressed to + * @param string $method + */ + private function create_test_dataflow(string $from, string $tocompressed, string $todecompressed, string $method) { + $dataflow = new dataflow(); + $dataflow->name = 'compression-connector-test'; + $dataflow->save(); + + $compress = new step(); + $compress->config = Yaml::dump([ + 'from' => $from, + 'to' => $tocompressed, + 'method' => $method, + 'command' => 'compress' + ]); + + $compress->name = 'compress'; + $compress->type = connector_compression::class; + + $dataflow->add_step($compress); + + $decompress = new step(); + $decompress->config = Yaml::dump([ + 'from' => $tocompressed, + 'to' => $todecompressed, + 'method' => $method, + 'command' => 'decompress' + ]); + + $decompress->name = 'decompress'; + $decompress->type = connector_compression::class; + + $dataflow->add_step($decompress); + return $dataflow; + } + + /** + * Test compression + */ + public function test_gzip_compression_decompression() { + // Ensure gzip is installed, otherwise we should skip the test. + if (!is_executable(get_config('gzip_exec_path', 'tool_dataflows')) !== true) { + $this->markTestSkipped('gzip is not installed'); + } + + $from = $this->basedir . '/input.txt'; + $tocompressed = $this->basedir . '/output.txt.gz'; + $todecompressed = $this->basedir . '/output_data.txt'; + + $datatowrite = 'testdata'; + file_put_contents($from, $datatowrite); + + // Input should exist (we just wrote to it), but the output should NOT exist yet. + $this->assertTrue(is_file($from)); + $this->assertFalse(is_file($tocompressed)); + $this->assertFalse(is_file($todecompressed)); + + $dataflow = $this->create_test_dataflow($from, $tocompressed, $todecompressed, 'gzip'); + + ob_start(); + $engine = new engine($dataflow, false, false); + $engine->execute(); + ob_get_clean(); + + // Check that the compressed file exists and also that the original file was left intact. + $this->assertTrue(is_file($from)); + $this->assertTrue(is_file($tocompressed)); + $this->assertTrue(is_file($todecompressed)); + + // Check that the originally written data ended up the same in the decompressed file. + $decompresseddata = file_get_contents($todecompressed); + $this->assertEquals($datatowrite, $decompresseddata); + + $vars = $engine->get_variables_root()->get('steps.compress.vars'); + $this->assertTrue($vars->success); + + $vars = $engine->get_variables_root()->get('steps.decompress.vars'); + $this->assertTrue($vars->success); + } + + /** + * Tests gzip validation + */ + public function test_gzip_validation() { + // Ensure gzip is installed, otherwise we should skip the test. + if (!is_executable(get_config('gzip_exec_path', 'tool_dataflows')) !== true) { + $this->markTestSkipped('gzip is not installed'); + } + + $from = $this->basedir . '/input.txt'; + $to = $this->basedir . '/output.txt.gz'; + $todecompressed = $this->basedir . '/output_new.txt'; + $dataflow = $this->create_test_dataflow($from, $to, $todecompressed, 'gzip'); + $step = $dataflow->get_steps()->compress; + + // Initially the default gzip should be executable. + // Which means the step is ready to run. + $this->assertTrue($step->steptype->validate_for_run()); + + // Break the gzip config. + set_config('gzip_exec_path', '/not/a/real/path', 'tool_dataflows'); + $validation = $step->steptype->validate_for_run(); + + $this->assertIsArray($validation); + $this->assertArrayHasKey('config_method', $validation); + } +} + diff --git a/tests/tool_dataflows_flow_compression_test.php b/tests/tool_dataflows_flow_compression_test.php new file mode 100644 index 00000000..8825190b --- /dev/null +++ b/tests/tool_dataflows_flow_compression_test.php @@ -0,0 +1,146 @@ +. + +namespace tool_dataflows; + +use Symfony\Component\Yaml\Yaml; +use tool_dataflows\local\execution\engine; +use tool_dataflows\local\step\flow_compression; +use tool_dataflows\local\step\reader_directory_file_list; + +/** + * Unit test for the compression flow step. + * + * @package tool_dataflows + * @author Matthew Hilton + * @copyright 2023, Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + * @covers \tool_dataflows\local\step\connector_compression + */ +class tool_dataflows_flow_compression_test extends \advanced_testcase { + /** @var string $readdir directory with files in it to read from for flow step */ + private $readdir; + + /** @var string $outdir directory where the compressed/decompressed files are outputted to */ + private $outdir; + + /** + * Sets up tests + */ + protected function setUp(): void { + parent::setUp(); + $this->resetAfterTest(); + + $basedir = make_unique_writable_directory(make_temp_directory('tool_dataflows')); + $this->readdir = $basedir . '/filesin'; + $this->outdir = $basedir . '/filesout'; + + mkdir($this->readdir); + mkdir($this->outdir); + + // Generate some files to read from in the read directory. + file_put_contents($this->readdir . '/test1.txt', 'test1234'); + file_put_contents($this->readdir . '/test2.txt', 'test1234'); + + set_config('permitted_dirs', $basedir, 'tool_dataflows'); + } + + /** + * Creates a test dataflow + * @param string $method + */ + private function create_test_dataflow(string $method) { + $dataflow = new dataflow(); + $dataflow->name = 'compression-connector-test'; + $dataflow->save(); + + $reader = new step(); + $reader->config = Yaml::dump([ + 'directory' => $this->readdir, + 'pattern' => "*", + 'returnvalue' => 'basename', + 'sort' => 'alpha', + 'offset' => '0', + 'limit' => '0' + ]); + $reader->name = 'reader'; + $reader->type = reader_directory_file_list::class; + $dataflow->add_step($reader); + + $compress = new step(); + $compress->config = Yaml::dump([ + 'from' => $this->readdir . '/${{record.filename}}', + 'to' => $this->outdir . '/${{record.filename}}-out.gz', + 'method' => $method, + 'command' => 'compress' + ]); + $compress->depends_on([$reader]); + $compress->name = 'compress'; + $compress->type = flow_compression::class; + $dataflow->add_step($compress); + + $decompress = new step(); + $decompress->config = Yaml::dump([ + 'from' => $this->outdir . '/${{record.filename}}-out.gz', + 'to' => $this->outdir . '/${{record.filename}}-out-decompressed.txt', + 'method' => $method, + 'command' => 'decompress' + ]); + $decompress->depends_on([$compress]); + $decompress->name = 'decompress'; + $decompress->type = flow_compression::class; + $dataflow->add_step($decompress); + + return $dataflow; + } + + /** + * Test compression + */ + public function test_gzip_compression_decompression() { + // Ensure gzip is installed, otherwise we should skip the test. + if (!is_executable(get_config('gzip_exec_path', 'tool_dataflows')) !== true) { + $this->markTestSkipped('gzip is not installed'); + } + + // We have two test files, but also . and .. exist so we need to account for them. + $this->assertCount(2 + 2, scandir($this->readdir)); + + $dataflow = $this->create_test_dataflow('gzip'); + + ob_start(); + $engine = new engine($dataflow, false, false); + $engine->execute(); + ob_get_clean(); + + // Check the files were outputted correctly. + // Note we check the mime type rather than the file extension + // since the extension may be misleading. + $outdircontenttypes = array_map(function($filename) { + $fullpath = $this->outdir . '/' . $filename; + return mime_content_type($fullpath); + }, scandir($this->outdir)); + + $this->assertCount(2, array_filter($outdircontenttypes, function($type) { + return $type == 'application/gzip'; + })); + + $this->assertCount(2, array_filter($outdircontenttypes, function($type) { + return $type == 'text/plain'; + })); + } +} +