diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 40b5d5bb4..d01df67f1 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -16,6 +16,11 @@ jobs:
integration-test:
if: ${{ !(contains(github.event.pull_request.labels.*.name, 'WIP') || contains(github.event.pull_request.labels.*.name, 'lint-only')) }}
name: BEE Integration Test
+ strategy:
+ matrix:
+ batch_scheduler: [Slurm, Flux]
+ env:
+ BATCH_SCHEDULER: ${{ matrix.batch_scheduler }}
# Note: Needs to run on 22.04 or later since slurmrestd doesn't seem to be
# available on 20.04
runs-on: ubuntu-22.04
@@ -23,11 +28,11 @@ jobs:
- uses: actions/checkout@v3
- name: Dependency Install
run: ./ci/deps_install.sh
- - name: Slurm Setup and Install
- run: ./ci/slurm_start.sh
+ - name: Batch Scheduler Install and Start
+ run: ./ci/batch_scheduler.sh
- name: BEE Install
run: ./ci/bee_install.sh
- - name: BEE Start
- run: ./ci/bee_start.sh
+ - name: BEE Config
+ run: ./ci/bee_config.sh
- name: Integration Test
run: ./ci/integration_test.sh
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 3a70dd50b..a89ef0c6f 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -16,6 +16,9 @@ jobs:
integration-test:
if: ${{ !(contains(github.event.pull_request.labels.*.name, 'WIP') || contains(github.event.pull_request.labels.*.name, 'lint-only')) }}
name: BEE Unit Tests
+ env:
+ # Unit tests are only run with Slurm right now
+ BATCH_SCHEDULER: Slurm
# Note: Needs to run on 22.04 or later since slurmrestd doesn't seem to be
# available on 20.04
runs-on: ubuntu-22.04
@@ -27,5 +30,7 @@ jobs:
run: ./ci/slurm_start.sh
- name: BEE Install
run: ./ci/bee_install.sh
+ - name: BEE Config
+ run: ./ci/bee_config.sh
- name: Unit tests
run: ./ci/unit_tests.sh
diff --git a/RELEASE.rst b/RELEASE.rst
index 111602a43..d530a37c6 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -16,7 +16,7 @@ Publishing a new release
that matches the version in pyproject.toml
6. Follow step 2 but uncheck Allow specified actors to bypass and don't forget save
7. Finally, on the main branch, first run a ``poetry build`` and then a
- ``poetry publish``. The second command will ask for a username and password
+ ``poetry publish``. The second command will ask for a username and password (You may need to add the --username --password options to ``poetry build``)
for PyPI.
Check the documentation at: `https://lanl.github.io/BEE/ `_
@@ -26,3 +26,6 @@ Also upgrade the pip version in your python or anaconda environment and check th
**WARNING**: Once a version is pushed to PyPI, it cannot be undone. You can
'delete' the version from the package settings, but you can no longer publish
an update to that same version.
+
+8. After the version is published change the version in develop to a pre-release of the next version
+ (example new version will be 0.1.x edit pyproject.toml version to be 0.1.xrc1
diff --git a/beeflow/client/bee_client.py b/beeflow/client/bee_client.py
index 6c8bed273..e24a74b2b 100644
--- a/beeflow/client/bee_client.py
+++ b/beeflow/client/bee_client.py
@@ -18,10 +18,13 @@
import requests
import typer
-from beeflow.common.config_driver import BeeConfig as bc
+from beeflow.common import config_driver
from beeflow.common.cli import NaturalOrderGroup
from beeflow.common.connection import Connection
-
+from beeflow.common import paths
+from beeflow.common.parser import CwlParser
+from beeflow.common.wf_data import generate_workflow_id
+from beeflow.client import core
# Length of a shortened workflow ID
short_id_len = 6 #noqa: Not a constant
@@ -80,7 +83,7 @@ def error_handler(resp): # noqa (this is an error handler, it doesn't need to r
def _wfm_conn():
"""Return a connection to the WFM."""
- return Connection(bc.get('workflow_manager', 'socket'),
+ return Connection(paths.wfm_socket(),
error_handler=error_handler)
@@ -164,6 +167,8 @@ def match_short_id(wf_id):
app = typer.Typer(no_args_is_help=True, add_completion=False, cls=NaturalOrderGroup)
+app.add_typer(core.app, name='core')
+app.add_typer(config_driver.app, name='config')
@app.command()
@@ -195,6 +200,13 @@ def is_parent(parent, path):
if not yaml_path.exists():
error_exit(f'YAML file {yaml} does not exist')
+ # Parse workflow
+ parser = CwlParser()
+ workflow_id = generate_workflow_id()
+ workflow, tasks = parser.parse_workflow(workflow_id, str(main_cwl_path),
+ job=str(yaml_path))
+ tasks = [jsonpickle.encode(task) for task in tasks]
+
cwl_indir = is_parent(wf_path, main_cwl_path)
yaml_indir = is_parent(wf_path, yaml_path)
# The CWL and YAML file are already in the workflow directory
@@ -214,6 +226,18 @@ def is_parent(parent, path):
wf_tarball = open(package_path, 'rb')
shutil.rmtree(tempdir_path)
else:
+ # Untar and parse workflow
+ tempdir_path = pathlib.Path(tempfile.mkdtemp())
+ tempdir_wf_path = unpackage(wf_path, tempdir_path)
+ main_cwl_path = pathlib.Path(tempdir_wf_path / main_cwl).resolve()
+ yaml_path = pathlib.Path(tempdir_wf_path / yaml).resolve()
+
+ parser = CwlParser()
+ workflow_id = generate_workflow_id()
+ workflow, tasks = parser.parse_workflow(workflow_id, str(main_cwl_path),
+ job=str(yaml_path))
+
+ shutil.rmtree(tempdir_path)
wf_tarball = open(wf_path, 'rb')
else:
error_exit(f'Workflow tarball {wf_path} cannot be found')
@@ -228,9 +252,9 @@ def is_parent(parent, path):
data = {
'wf_name': wf_name.encode(),
'wf_filename': os.path.basename(wf_path).encode(),
- 'main_cwl': os.path.basename(main_cwl),
- 'yaml': os.path.basename(yaml),
- 'workdir': workdir
+ 'workdir': workdir,
+ 'workflow': jsonpickle.encode(workflow),
+ 'tasks': jsonpickle.encode(tasks, warn=True)
}
files = {
'workflow_archive': wf_tarball
@@ -322,8 +346,30 @@ def package(wf_path: pathlib.Path = typer.Argument(...,
return package_path
-@app.command()
-def listall():
+def unpackage(package_path, dest_path):
+ """Unpackage a workflow tarball for parsing."""
+ package_str = str(package_path)
+ package_path = package_path.resolve()
+
+ if not package_str.endswith('.tgz'):
+ # No cleanup, maybe we should rm dest_path?
+ error_exit("Invalid package name, please use the beeflow package command")
+ wf_dir = package_str[:-4]
+
+ return_code = subprocess.run(['tar', '-C', dest_path, '-xf', package_path],
+ check=True).returncode
+ if return_code != 0:
+ # No cleanup, maybe we should rm dest_path?
+ error_exit("Unpackage failed")
+ else:
+ print(f"Package {package_str} unpackaged successfully")
+
+
+ return dest_path/wf_dir # noqa: Not an arithmetic operation
+
+
+@app.command('list')
+def list_workflows():
"""List all worklfows."""
try:
conn = _wfm_conn()
@@ -494,7 +540,6 @@ def main():
"""Execute bee_client."""
global _INTERACTIVE
_INTERACTIVE = True
- bc.init()
app()
diff --git a/beeflow/cli.py b/beeflow/client/core.py
old mode 100755
new mode 100644
similarity index 72%
rename from beeflow/cli.py
rename to beeflow/client/core.py
index 3d56ba0f2..7805eaf38
--- a/beeflow/cli.py
+++ b/beeflow/client/core.py
@@ -22,11 +22,7 @@
from beeflow.common.config_driver import BeeConfig as bc
from beeflow.common import cli_connection
-
-
-bc.init()
-# Max number of times a component can be restarted
-MAX_RESTARTS = bc.get('DEFAULT', 'max_restarts')
+from beeflow.common import paths
class ComponentManager:
@@ -95,16 +91,18 @@ def run(self, base_components):
def poll(self):
"""Poll each process to check for errors, restart failed processes."""
+ # Max number of times a component can be restarted
+ max_restarts = bc.get('DEFAULT', 'max_restarts')
for name in self.procs: # noqa no need to iterate with items() since self.procs may be set
component = self.components[name]
if component['failed']:
continue
returncode = self.procs[name].poll()
if returncode is not None:
- log = log_fname(name)
+ log = paths.log_fname(name)
print(f'Component "{name}" failed, check log "{log}"')
- if component['restart_count'] >= MAX_RESTARTS:
- print(f'Component "{name}" has been restarted {MAX_RESTARTS} '
+ if component['restart_count'] >= max_restarts:
+ print(f'Component "{name}" has been restarted {max_restarts} '
'times, not restarting again')
component['failed'] = True
else:
@@ -127,9 +125,6 @@ def kill(self):
proc.terminate()
-MGR = ComponentManager()
-
-
def warn(*pargs):
"""Print a red warning message."""
typer.secho(' '.join(pargs), fg=typer.colors.RED, file=sys.stderr)
@@ -142,85 +137,70 @@ def launch_with_gunicorn(module, sock_path, *args, **kwargs):
*args, **kwargs)
-def log_path():
- """Return the main log path."""
- bee_workdir = bc.get('DEFAULT', 'bee_workdir')
- return os.path.join(bee_workdir, 'logs')
-
-
-def log_fname(component):
- """Determine the log file name for the given component."""
- return os.path.join(log_path(), f'{component}.log')
-
-
def open_log(component):
"""Determine the log for the component, open and return it."""
- log = log_fname(component)
+ log = paths.log_fname(component)
return open(log, 'a', encoding='utf-8')
-# Slurmrestd will be started only if we're running with Slurm and
-# slurm::use_commands is not True
-NEED_SLURMRESTD = (bc.get('DEFAULT', 'workload_scheduler') == 'Slurm'
- and not bc.get('slurm', 'use_commands'))
-
-
-@MGR.component('wf_manager', ('scheduler',))
-def start_wfm():
- """Start the WFM."""
- fp = open_log('wf_manager')
- sock_path = bc.get('workflow_manager', 'socket')
- return launch_with_gunicorn('beeflow.wf_manager.wf_manager:create_app()',
- sock_path, stdout=fp, stderr=fp)
-
-
-TM_DEPS = []
-if NEED_SLURMRESTD:
- TM_DEPS.append('slurmrestd')
-
-
-@MGR.component('task_manager', TM_DEPS)
-def start_task_manager():
- """Start the TM."""
- fp = open_log('task_manager')
- sock_path = bc.get('task_manager', 'socket')
- return launch_with_gunicorn('beeflow.task_manager:flask_app', sock_path, stdout=fp, stderr=fp)
-
-
-@MGR.component('scheduler', ())
-def start_scheduler():
- """Start the scheduler."""
- fp = open_log('scheduler')
- sock_path = bc.get('scheduler', 'socket')
- # Using a function here because of the funny way that the scheduler's written
- return launch_with_gunicorn('beeflow.scheduler.scheduler:create_app()', sock_path, stdout=fp,
- stderr=fp)
-
-
-# Workflow manager and task manager need to be opened with PIPE for their stdout/stderr
-if NEED_SLURMRESTD:
- @MGR.component('slurmrestd')
- def start_slurm_restd():
- """Start BEESlurmRestD. Returns a Popen process object."""
- bee_workdir = bc.get('DEFAULT', 'bee_workdir')
- slurmrestd_log = '/'.join([bee_workdir, 'logs', 'restd.log'])
- slurm_socket = bc.get('slurm', 'slurmrestd_socket')
- openapi_version = bc.get('slurm', 'openapi_version')
- slurm_args = f'-s openapi/{openapi_version}'
- subprocess.run(['rm', '-f', slurm_socket], check=True)
- # log.info("Attempting to open socket: {}".format(slurm_socket))
- fp = open(slurmrestd_log, 'w', encoding='utf-8') # noqa
- cmd = ['slurmrestd']
- cmd.extend(slurm_args.split())
- cmd.append(f'unix:{slurm_socket}')
- return subprocess.Popen(cmd, stdout=fp, stderr=fp)
-
-
-def handle_terminate(signum, stack): # noqa
- """Handle a terminate signal."""
- # Kill all subprocesses
- MGR.kill()
- sys.exit(1)
+def need_slurmrestd():
+ """Check if slurmrestd is needed."""
+ return (bc.get('DEFAULT', 'workload_scheduler') == 'Slurm'
+ and not bc.get('slurm', 'use_commands'))
+
+
+def init_components():
+ """Initialize the components and component manager."""
+ mgr = ComponentManager()
+
+ # Slurmrestd will be started only if we're running with Slurm and
+ # slurm::use_commands is not True
+
+ @mgr.component('wf_manager', ('scheduler',))
+ def start_wfm():
+ """Start the WFM."""
+ fp = open_log('wf_manager')
+ return launch_with_gunicorn('beeflow.wf_manager.wf_manager:create_app()',
+ paths.wfm_socket(), stdout=fp, stderr=fp)
+
+ tm_deps = []
+ if need_slurmrestd():
+ tm_deps.append('slurmrestd')
+
+ @mgr.component('task_manager', tm_deps)
+ def start_task_manager():
+ """Start the TM."""
+ fp = open_log('task_manager')
+ return launch_with_gunicorn('beeflow.task_manager:flask_app', paths.tm_socket(),
+ stdout=fp, stderr=fp)
+
+ @mgr.component('scheduler', ())
+ def start_scheduler():
+ """Start the scheduler."""
+ fp = open_log('scheduler')
+ # Using a function here because of the funny way that the scheduler's written
+ return launch_with_gunicorn('beeflow.scheduler.scheduler:create_app()',
+ paths.sched_socket(), stdout=fp, stderr=fp)
+
+ # Workflow manager and task manager need to be opened with PIPE for their stdout/stderr
+ if need_slurmrestd():
+ @mgr.component('slurmrestd')
+ def start_slurm_restd():
+ """Start BEESlurmRestD. Returns a Popen process object."""
+ bee_workdir = bc.get('DEFAULT', 'bee_workdir')
+ slurmrestd_log = '/'.join([bee_workdir, 'logs', 'restd.log'])
+ openapi_version = bc.get('slurm', 'openapi_version')
+ slurm_args = f'-s openapi/{openapi_version}'
+ slurm_socket = paths.slurm_socket()
+ subprocess.run(['rm', '-f', slurm_socket], check=True)
+ # log.info("Attempting to open socket: {}".format(slurm_socket))
+ fp = open(slurmrestd_log, 'w', encoding='utf-8') # noqa
+ cmd = ['slurmrestd']
+ cmd.extend(slurm_args.split())
+ cmd.append(f'unix:{slurm_socket}')
+ return subprocess.Popen(cmd, stdout=fp, stderr=fp)
+
+ return mgr
MIN_CHARLIECLOUD_VERSION = (0, 32)
@@ -248,6 +228,14 @@ def check_dependencies():
warn('This version of Charliecloud is too old, please upgrade to at '
f'least version {version_str(MIN_CHARLIECLOUD_VERSION)}')
sys.exit(1)
+ # Check for the flux API
+ if bc.get('DEFAULT', 'workload_scheduler') == 'Flux':
+ try:
+ import flux # noqa needed to check whether flux api is actually installed
+ except ModuleNotFoundError:
+ warn('Failed to import flux Python API. Please make sure you can '
+ 'use flux in your environment.')
+ sys.exit(1)
class Beeflow:
@@ -263,8 +251,7 @@ def loop(self):
"""Run the main loop."""
print(f'Running on {socket.gethostname()}')
self.mgr.run(self.base_components)
- sock_path = bc.get('DEFAULT', 'beeflow_socket')
- with cli_connection.server(sock_path) as server:
+ with cli_connection.server(paths.beeflow_socket()) as server:
while not self.quit:
# Handle a message from the client, if there is one
self.handle_client(server)
@@ -296,8 +283,14 @@ def handle_client(self, server):
print(f'connection failed: {err}')
-def daemonize(base_components):
+def daemonize(mgr, base_components):
"""Start beeflow as a daemon, monitoring all processes."""
+ def handle_terminate(signum, stack): # noqa
+ """Handle a terminate signal."""
+ # Kill all subprocesses
+ mgr.kill()
+ sys.exit(1)
+
# Now set signal handling, the log and finally daemonize
signal_map = {
signal.SIGINT: handle_terminate,
@@ -306,7 +299,7 @@ def daemonize(base_components):
fp = open_log('beeflow')
with daemon.DaemonContext(signal_map=signal_map, stdout=fp, stderr=fp, stdin=fp,
umask=0o002):
- Beeflow(MGR, base_components).loop()
+ Beeflow(mgr, base_components).loop()
app = typer.Typer(no_args_is_help=True)
@@ -315,11 +308,12 @@ def daemonize(base_components):
@app.command()
def start(foreground: bool = typer.Option(False, '--foreground', '-F',
help='run in the foreground')):
- """Attempt to daemonize if not in debug and start all BEE components."""
- beeflow_log = log_fname('beeflow')
+ """Start all BEE components."""
+ mgr = init_components()
+ beeflow_log = paths.log_fname('beeflow')
check_dependencies()
- sock_path = bc.get('DEFAULT', 'beeflow_socket')
- if bc.get('DEFAULT', 'workload_scheduler') == 'Slurm' and not NEED_SLURMRESTD:
+ sock_path = paths.beeflow_socket()
+ if bc.get('DEFAULT', 'workload_scheduler') == 'Slurm' and not need_slurmrestd():
warn('Not using slurmrestd. Command-line interface will be used.')
# Note: there is a possible race condition here, however unlikely
if os.path.exists(sock_path):
@@ -340,27 +334,26 @@ def start(foreground: bool = typer.Option(False, '--foreground', '-F',
sys.exit(1)
print('Starting beeflow...')
if not foreground:
- print(f'Check "{beeflow_log}" or run `beeflow status` for more information.')
+ print(f'Check "{beeflow_log}" or run `beeflow core status` for more information.')
# Create the log path if it doesn't exist yet
- path = log_path()
+ path = paths.log_path()
os.makedirs(path, exist_ok=True)
base_components = ['wf_manager', 'task_manager', 'scheduler']
if foreground:
try:
- Beeflow(MGR, base_components).loop()
+ Beeflow(mgr, base_components).loop()
except KeyboardInterrupt:
- MGR.kill()
+ mgr.kill()
else:
- daemonize(base_components)
+ daemonize(mgr, base_components)
@app.command()
def status():
"""Check the status of beeflow and the components."""
- sock_path = bc.get('DEFAULT', 'beeflow_socket')
- resp = cli_connection.send(sock_path, {'type': 'status'})
+ resp = cli_connection.send(paths.beeflow_socket(), {'type': 'status'})
if resp is None:
- beeflow_log = log_fname('beeflow')
+ beeflow_log = paths.log_fname('beeflow')
warn('Cannot connect to the beeflow daemon, is it running? Check the '
f'log at "{beeflow_log}".')
sys.exit(1)
@@ -373,21 +366,20 @@ def status():
def stop():
"""Stop the current running beeflow daemon."""
stop_msg = ("\n** Please ensure all workflows are complete before stopping beeflow. **"
- + "\n** Check the status of workflows by running 'beeclient listall'. **"
+ + "\n** Check the status of workflows by running 'beeflow list'. **"
+ "\nAre you sure you want to kill beeflow components? [y/n] ")
ans = input(stop_msg)
if ans.lower() != 'y':
return
- sock_path = bc.get('DEFAULT', 'beeflow_socket')
- resp = cli_connection.send(sock_path, {'type': 'quit'})
+ resp = cli_connection.send(paths.beeflow_socket(), {'type': 'quit'})
if resp is None:
- beeflow_log = log_fname('beeflow')
+ beeflow_log = paths.log_fname('beeflow')
warn('Error: beeflow is not running on this system. It could be '
'running on a different front end.\n'
f' Check the beeflow log: "{beeflow_log}".')
sys.exit(1)
# As long as it returned something, we should be good
- beeflow_log = log_fname('beeflow')
+ beeflow_log = paths.log_fname('beeflow')
print(f'Beeflow has stopped. Check the log at "{beeflow_log}".')
@@ -407,12 +399,3 @@ def version_callback(version: bool = False):
if version:
version = importlib.metadata.version("hpc-beeflow")
print(version)
-
-
-def main():
- """Start the beeflow app."""
- app()
-
-
-if __name__ == '__main__':
- main()
diff --git a/beeflow/common/build/README.md b/beeflow/common/build/README.md
index bc0d4cb1b..2ee8baa83 100644
--- a/beeflow/common/build/README.md
+++ b/beeflow/common/build/README.md
@@ -44,13 +44,6 @@ Each step in a workflow may include a reference to `DockerRequirement` in the CW
A few examples to use for testing:
## CharliecloudBuildDriver Examples
-### Initialize BeeConfig for all the examples:
-
-```
-from beeflow.common.config_driver import BeeConfig as bc
-bc.init()
-```
-
### dockerPull
```
from beeflow.common.build.container_drivers import CharliecloudBuildDriver
diff --git a/beeflow/common/config_driver.py b/beeflow/common/config_driver.py
index 1b476ebca..34994115a 100644
--- a/beeflow/common/config_driver.py
+++ b/beeflow/common/config_driver.py
@@ -77,17 +77,15 @@ def __init__(self, **kwargs):
'BeeConfig is a singleton class. Call BeeConfig.init() once to initialize.'
)
- @classmethod
- def ready(cls):
- """Check if the class has been initialized."""
- return cls.CONFIG is not None
-
@classmethod
def init(cls, userconfig=None, **_kwargs):
"""Initialize BeeConfig class.
We check the platform and read in system and user configuration files.
- If the user configuration file doesn't exist we create it with a [DEFAULT] section.
+ Note that this only needs to be called if one needs to initialize the
+ config from a different file or with different keyword arguments. If
+ so, then this must be called before any calls to bc.get() are made,
+ since that call will initialize the config with default settings.
"""
global USERCONFIG_FILE
if cls.CONFIG is not None:
@@ -100,7 +98,7 @@ def init(cls, userconfig=None, **_kwargs):
with open(USERCONFIG_FILE, encoding='utf-8') as fp:
config.read_file(fp)
except FileNotFoundError:
- sys.exit('Configuration file does not exist! Please try running `beecfg new`.')
+ sys.exit('Configuration file does not exist! Please try running `beeflow config new`.')
# remove default keys from the other sections
default_keys = list(config['DEFAULT'])
config = {sec_name: {key: config[sec_name][key] for key in config[sec_name]
@@ -118,13 +116,13 @@ def userconfig_path(cls):
def get(cls, sec_name, opt_name):
"""Get a configuration value.
- If this throws, then either BeeConfig has not been initialized or a
- configuration value is missing from the definition. Default values
- are built into the ConfigValidator class, so there is no need to
- specify a default or a fallback here.
+ If this throws, then the configuration value is missing from the
+ definition. Initialize the config if not already initialized. Default
+ values are built into the ConfigValidator class, so there is no need
+ to specify a default or a fallback here.
"""
if cls.CONFIG is None:
- raise RuntimeError('BeeConfig has not been initialized')
+ cls.init()
try:
return cls.CONFIG[sec_name][opt_name] # noqa (this object is subscritable)
except KeyError:
@@ -222,12 +220,6 @@ def filepath_completion_input(*pargs, **kwargs):
DEFAULT_TM_PORT = 5050 + OFFSET
DEFAULT_SCHED_PORT = 5100 + OFFSET
-SOCKET_PATH = join_path(HOME_DIR, '.beeflow', 'sockets')
-DEFAULT_WFM_SOCKET = join_path(SOCKET_PATH, 'wf_manager.sock')
-DEFAULT_TM_SOCKET = join_path(SOCKET_PATH, 'task_manager.sock')
-DEFAULT_SCHED_SOCKET = join_path(SOCKET_PATH, 'scheduler.sock')
-DEFAULT_BEEFLOW_SOCKET = join_path(SOCKET_PATH, 'beeflow.sock')
-
DEFAULT_BEE_WORKDIR = join_path(HOME_DIR, '.beeflow')
USER = getpass.getuser()
# Create the validator
@@ -235,34 +227,21 @@ def filepath_completion_input(*pargs, **kwargs):
VALIDATOR.section('DEFAULT', info='Default bee.conf configuration section.')
VALIDATOR.option('DEFAULT', 'bee_workdir', info='main BEE workdir',
attrs={'default': DEFAULT_BEE_WORKDIR}, validator=validation.make_dir)
-VALIDATOR.option('DEFAULT', 'workload_scheduler', choices=('Slurm', 'LSF', 'Simple'),
+VALIDATOR.option('DEFAULT', 'workload_scheduler', choices=('Slurm', 'LSF', 'Flux', 'Simple'),
info='backend workload scheduler to interact with ')
VALIDATOR.option('DEFAULT', 'use_archive', validator=validation.bool_, attrs={'default': True},
info='use the BEE archiving functinality')
VALIDATOR.option('DEFAULT', 'bee_dep_image', validator=validation.file_,
info='container image with BEE dependencies',
attrs={'input': filepath_completion_input})
-VALIDATOR.option('DEFAULT', 'beeflow_pidfile',
- attrs={'default': join_path(DEFAULT_BEE_WORKDIR, 'beeflow.pid')},
- info='location of beeflow pidfile')
-VALIDATOR.option('DEFAULT', 'beeflow_socket',
- validator=validation.parent_dir,
- attrs={'default': DEFAULT_BEEFLOW_SOCKET},
- info='location of beeflow socket')
VALIDATOR.option('DEFAULT', 'max_restarts', validator=int,
attrs={'default': 3},
info='max number of times beeflow will restart a component on failure')
# Workflow Manager
VALIDATOR.section('workflow_manager', info='Workflow manager section.')
-VALIDATOR.option('workflow_manager', 'socket', validator=validation.parent_dir,
- attrs={'default': DEFAULT_WFM_SOCKET},
- info='workflow manager port')
# Task manager
VALIDATOR.section('task_manager',
info='Task manager configuration and config of container to use.')
-VALIDATOR.option('task_manager', 'socket',
- attrs={'default': DEFAULT_TM_SOCKET},
- validator=validation.parent_dir, info='task manager listen port')
VALIDATOR.option('task_manager', 'container_runtime', attrs={'default': 'Charliecloud'},
choices=('Charliecloud', 'Singularity'),
info='container runtime to use for configuration')
@@ -337,31 +316,16 @@ def validate_chrun_opts(opts):
attrs={'default': shutil.which('slurmrestd') is None},
info='if set, use slurm cli commands instead of slurmrestd')
DEFAULT_SLURMRESTD_SOCK = join_path('/tmp', f'slurm_{USER}_{random.randint(1, 10000)}.sock')
-VALIDATOR.option('slurm', 'slurmrestd_socket', validator=validation.parent_dir,
- attrs={'default': DEFAULT_SLURMRESTD_SOCK},
- info='socket location')
VALIDATOR.option('slurm', 'openapi_version', attrs={'default': 'v0.0.37'},
info='openapi version to use for slurmrestd')
# Scheduler
VALIDATOR.section('scheduler', info='Scheduler configuration section.')
-VALIDATOR.option('scheduler', 'log',
- attrs={'default': join_path(DEFAULT_BEE_WORKDIR, 'logs', 'scheduler.log')},
- info='scheduler log file')
-VALIDATOR.option('scheduler', 'socket', validator=validation.parent_dir,
- attrs={'default': DEFAULT_SCHED_SOCKET},
- info='scheduler socket')
-VALIDATOR.option('scheduler', 'alloc_logfile',
- attrs={'default': join_path(DEFAULT_BEE_WORKDIR, 'logs', 'scheduler_alloc.log')},
- info='allocation logfile, to be used for later training')
SCHEDULER_ALGORITHMS = ('fcfs', 'backfill', 'sjf')
VALIDATOR.option('scheduler', 'algorithm', attrs={'default': 'fcfs'}, choices=SCHEDULER_ALGORITHMS,
info='scheduling algorithm to use')
VALIDATOR.option('scheduler', 'default_algorithm', attrs={'default': 'fcfs'},
choices=SCHEDULER_ALGORITHMS,
info=('default algorithm to use'))
-VALIDATOR.option('scheduler', 'workdir',
- attrs={'default': join_path(DEFAULT_BEE_WORKDIR, 'scheduler')},
- info='workdir to be used for the scheduler')
def print_wrap(text, next_line_indent=''):
@@ -482,11 +446,11 @@ def save(self):
f'\n\t{self.fname}',
'\n ** See documentation for values you should refrain from editing! **',
'\n ** Include job options (such as account) required for this system.**')
- print('\n(Try `beecfg info` to see more about each option)')
+ print('\n(Try `beeflow config info` to see more about each option)')
print(70 * '#')
-app = typer.Typer(no_args_is_help=False, add_completion=False, cls=NaturalOrderGroup)
+app = typer.Typer(no_args_is_help=True, add_completion=False, cls=NaturalOrderGroup)
@app.command()
@@ -541,20 +505,11 @@ def show(path: str = typer.Argument(default=USERCONFIG_FILE,
help='Path to config file')):
"""Show the contents of bee.conf."""
if not os.path.exists(path):
- print('The bee.conf does not exist yet. Please run `beecfg new`.')
+ print('The bee.conf does not exist yet. Please run `beeflow config new`.')
return
print(f'# {path}')
with open(path, encoding='utf-8') as fp:
print(fp.read(), end='')
-
-
-def main():
- """Entry point for config validation and help."""
- app()
-
-
-if __name__ == '__main__':
- app()
# Ignore C901: "'ConfigGenerator.choose_values' is too complex" - I disagree, if
# it's just based on LOC, then there are a number `print()` functions
# that are increasing the line count
diff --git a/beeflow/common/crt/charliecloud_driver.py b/beeflow/common/crt/charliecloud_driver.py
index c8afceb5b..19d0fafe9 100644
--- a/beeflow/common/crt/charliecloud_driver.py
+++ b/beeflow/common/crt/charliecloud_driver.py
@@ -4,6 +4,7 @@
"""
import os
+import yaml
from beeflow.common.crt.crt_driver import (ContainerRuntimeDriver, ContainerRuntimeResult,
Command, CommandType)
from beeflow.common.config_driver import BeeConfig as bc
@@ -48,6 +49,9 @@ def run_text(self, task): # noqa
use_container = None
task_container_name = task.get_requirement('DockerRequirement', 'beeflow:containerName')
+ bind_mounts = task.get_requirement('DockerRequirement', 'beeflow:bindMounts')
+ bind_mounts = (yaml.load(bind_mounts, Loader=yaml.SafeLoader)
+ if bind_mounts is not None else {})
baremetal = False
if task_container_name is None:
@@ -108,9 +112,13 @@ def run_text(self, task): # noqa
deployed_image_root = bc.get('builder', 'deployed_image_root')
hints = dict(task.hints)
- mpi_opt = '--join' if 'beeflow:MPIRequirement' in hints else ''
+ # --join is only supported with Slurm (maybe this logic shouldn't be in here)
+ if bc.get('DEFAULT', 'workload_scheduler') == 'Slurm':
+ mpi_opt = '--join' if 'beeflow:MPIRequirement' in hints else ''
+ else:
+ mpi_opt = ''
command = ' '.join(task.command)
- env_code = ''.join([self.cc_setup if self.cc_setup else '', task_workdir_env])
+ env_code = '\n'.join([self.cc_setup if self.cc_setup else '', task_workdir_env])
deployed_path = deployed_image_root + '/' + task_container_name
pre_commands = [
Command(f'mkdir -p {deployed_image_root}\n'.split(), CommandType.ONE_PER_NODE),
@@ -127,8 +135,10 @@ def run_text(self, task): # noqa
}
ctr_workdir_path = convert_path(task.workdir, bind_mounts)
extra_opts = f'--cd {ctr_workdir_path}'
+ bind_mount_opts = ' '.join(f'-b {path_a}:{path_b}'
+ for path_a, path_b in bind_mounts.items())
main_command = (f'ch-run {mpi_opt} {deployed_path} {self.chrun_opts} '
- f'{extra_opts} -- {command}\n').split()
+ f'{extra_opts} {bind_mount_opts} -- {command}\n').split()
main_command = Command(main_command)
post_commands = [
Command(f'rm -rf {deployed_path}\n'.split(), type_=CommandType.ONE_PER_NODE),
diff --git a/beeflow/common/db/bdb.py b/beeflow/common/db/bdb.py
index 3bed96c4b..f38dc47d2 100644
--- a/beeflow/common/db/bdb.py
+++ b/beeflow/common/db/bdb.py
@@ -2,8 +2,6 @@
import sqlite3
from sqlite3 import Error
-from beeflow.common.config_driver import BeeConfig as bc
-bc.init()
def connect_db(module, db_path):
diff --git a/beeflow/common/gdb/neo4j_cypher.py b/beeflow/common/gdb/neo4j_cypher.py
index e7701b551..f8c8913a8 100644
--- a/beeflow/common/gdb/neo4j_cypher.py
+++ b/beeflow/common/gdb/neo4j_cypher.py
@@ -218,17 +218,17 @@ def add_dependencies(tx, task, old_task=None, restarted_task=False):
"MATCH (t:Task)<-[:OUTPUT_OF]-(o:Output) "
"WITH s, t, sources, collect(o.id) as outputs "
"WHERE any(input IN sources WHERE input IN outputs) "
- "MERGE (s)-[:DEPENDS_ON]->(t) "
- "WITH s "
- "MATCH (s)<-[:OUTPUT_OF]-(o:Output) "
- "WITH s, collect(o.id) AS outputs "
- "MATCH (t:Task)<-[:INPUT_OF]-(i:Input) "
- "WITH s, t, outputs, collect(i.source) as sources "
- "WHERE any(output IN outputs WHERE output IN sources) "
- "MERGE (t)-[:DEPENDS_ON]->(s)")
+ "MERGE (s)-[:DEPENDS_ON]->(t)")
+ dependent_query = ("MATCH (s:Task {id: $task_id})<-[:OUTPUT_OF]-(o:Output) "
+ "WITH s, collect(o.id) AS outputs "
+ "MATCH (t:Task)<-[:INPUT_OF]-(i:Input) "
+ "WITH s, t, outputs, collect(i.source) as sources "
+ "WHERE any(output IN outputs WHERE output IN sources) "
+ "MERGE (t)-[:DEPENDS_ON]->(s)")
tx.run(begins_query, task_id=task.id)
tx.run(dependency_query, task_id=task.id)
+ tx.run(dependent_query, task_id=task.id)
def get_task_by_id(tx, task_id):
diff --git a/beeflow/common/parser/parser.py b/beeflow/common/parser/parser.py
index ab63f8bb7..9c58e8d51 100644
--- a/beeflow/common/parser/parser.py
+++ b/beeflow/common/parser/parser.py
@@ -14,19 +14,15 @@
import cwl_utils.parser.cwl_v1_2 as cwl_parser
from schema_salad.exceptions import ValidationException # noqa (pylama can't find the exception)
-from beeflow.common.config_driver import BeeConfig as bc
-from beeflow.common.wf_data import (InputParameter,
+from beeflow.common.wf_data import (Workflow,
+ Task,
+ InputParameter,
OutputParameter,
StepInput,
StepOutput,
Hint,
Requirement,
generate_workflow_id)
-from beeflow.wf_manager.resources import wf_utils
-
-
-if not bc.ready():
- bc.init()
# Map CWL types to Python types
@@ -52,7 +48,7 @@ def __init__(self, *args):
class CwlParser:
"""Class for parsing CWL files."""
- def __init__(self, wfi):
+ def __init__(self):
"""Initialize the CWL parser interface.
Sets the workflow interface for communication with the graph database.
@@ -61,7 +57,6 @@ def __init__(self, wfi):
self.path = None
self.steps = []
self.params = None
- self._wfi = wfi
def parse_workflow(self, workflow_id, cwl_path, job=None):
"""Parse a CWL Workflow file and load it into the graph database.
@@ -99,7 +94,7 @@ def resolve_input(input_, type_):
:type input_: WorkflowInputParameter
:param type_: the workflow input type
:type type_: str
- :rtype: str or int or float
+ :rtype: (Workflow, list of Task)
"""
# Use parsed input parameter for input value if it exists
input_id = _shortname(input_.id)
@@ -120,14 +115,13 @@ def resolve_input(input_, type_):
workflow_hints = self.parse_requirements(self.cwl.hints, as_hints=True)
workflow_requirements = self.parse_requirements(self.cwl.requirements)
- self._wfi.initialize_workflow(workflow_id, workflow_name, workflow_inputs,
- workflow_outputs, workflow_requirements, workflow_hints)
- for step in self.cwl.steps:
- self.parse_step(step)
+ workflow = Workflow(workflow_name, workflow_hints, workflow_requirements, workflow_inputs,
+ workflow_outputs, workflow_id)
+ tasks = [self.parse_step(step, workflow_id) for step in self.cwl.steps]
- return self._wfi
+ return workflow, tasks
- def parse_step(self, step):
+ def parse_step(self, step, workflow_id):
"""Parse a CWL step object.
Calling this to parse a CommandLineTool file without a corresponding
@@ -135,6 +129,9 @@ def parse_step(self, step):
:param step: the CWL step object
:type step: WorkflowStep
+ :param workflow_id: the workflow ID
+ :type workflow_id: str
+ :rtype: Task
"""
# Parse CWL file specified by run field, else parse run field as inline CommandLineTool
if isinstance(step.run, str):
@@ -160,9 +157,8 @@ def parse_step(self, step):
step_stdout = step_cwl.stdout
step_stderr = step_cwl.stderr
- self._wfi.add_task(step_name, base_command=step_command, inputs=step_inputs,
- outputs=step_outputs, requirements=step_requirements,
- hints=step_hints, stdout=step_stdout, stderr=step_stderr)
+ return Task(step_name, step_command, step_hints, step_requirements, step_inputs,
+ step_outputs, step_stdout, step_stderr, workflow_id)
def parse_job(self, job):
"""Parse a CWL input job file.
@@ -280,6 +276,18 @@ def parse_step_outputs(cwl_out, step_outputs, stdout, stderr):
return outputs
+ def _read_requirement_file(self, key, items):
+ """Read in a requirement file and replace it in the parsed items."""
+ base_path = os.path.dirname(self.path)
+ fname = items[key]
+ path = os.path.join(base_path, fname)
+ try:
+ with open(path, encoding='utf-8') as fp:
+ items[key] = fp.read()
+ except FileNotFoundError:
+ msg = f'Could not find a file for {key}: {fname}'
+ raise CwlParseError(msg) from None
+
def parse_requirements(self, requirements, as_hints=False):
"""Parse CWL hints/requirements.
@@ -294,22 +302,16 @@ def parse_requirements(self, requirements, as_hints=False):
return reqs
if as_hints:
for req in requirements:
- items = {k: v for k, v in req.items() if k != "class"}
+ items = {k: str(v) for k, v in req.items() if k != "class"}
# Load in the dockerfile at parse time
if 'dockerFile' in items:
- base_path = os.path.dirname(self.path)
- docker_file = items['dockerFile']
- path = os.path.join(base_path, docker_file)
- try:
- with open(path, encoding='utf-8') as fp:
- items['dockerFile'] = fp.read()
- except FileNotFoundError:
- msg = f'Could not find the docker file: {docker_file}'
- raise CwlParseError(msg) from None
+ self._read_requirement_file('dockerFile', items)
+ if 'beeflow:bindMounts' in items:
+ self._read_requirement_file('beeflow:bindMounts', items)
reqs.append(Hint(req['class'], items))
else:
for req in requirements:
- reqs.append(Requirement(req.class_, {k: v for k, v in vars(req).items()
+ reqs.append(Requirement(req.class_, {k: str(v) for k, v in vars(req).items()
if k not in ("extension_fields",
"loadingOptions", "class_")
and v is not None}))
@@ -350,10 +352,14 @@ def parse_args(args=None):
def main():
"""Run the parser on a CWL Workflow and job file directly."""
wf_id = generate_workflow_id()
- wfi = wf_utils.get_workflow_interface(wf_id)
- parser = CwlParser(wfi)
+ parser = CwlParser()
args = parse_args()
- parser.parse_workflow(wf_id, args.wf_file, args.inputs)
+ workflow, tasks = parser.parse_workflow(wf_id, args.wf_file, args.inputs)
+ print("Parsed workflow:")
+ print(workflow)
+
+ print("Parsed tasks:")
+ print(tasks)
if __name__ == "__main__":
diff --git a/beeflow/common/paths.py b/beeflow/common/paths.py
new file mode 100644
index 000000000..8a5c39e9d
--- /dev/null
+++ b/beeflow/common/paths.py
@@ -0,0 +1,51 @@
+"""Centralized path data for everything stored in the workdir."""
+import os
+from beeflow.common.config_driver import BeeConfig as bc
+
+
+def _workdir():
+ """Return the workdir."""
+ return bc.get('DEFAULT', 'bee_workdir')
+
+
+def _sockdir():
+ """Return the socket directory."""
+ sockdir = os.path.join(_workdir(), 'sockets')
+ os.makedirs(sockdir, exist_ok=True)
+ return sockdir
+
+
+def beeflow_socket():
+ """Return the socket for the beeflow daemon."""
+ return os.path.join(_sockdir(), 'beeflow.sock')
+
+
+def wfm_socket():
+ """Get the socket path for the Workflow Manager."""
+ return os.path.join(_sockdir(), 'wf_manager.sock')
+
+
+def tm_socket():
+ """Get the socket path for the Task Manager."""
+ return os.path.join(_sockdir(), 'task_manager.sock')
+
+
+def sched_socket():
+ """Get the scheduler socket."""
+ return os.path.join(_sockdir(), 'scheduler.sock')
+
+
+def slurm_socket():
+ """Get the slurm socket (for slurmrestd)."""
+ return os.path.join(_sockdir(), 'slurmrestd.sock')
+
+
+def log_path():
+ """Return the main log path."""
+ bee_workdir = bc.get('DEFAULT', 'bee_workdir')
+ return os.path.join(bee_workdir, 'logs')
+
+
+def log_fname(component):
+ """Determine the log file name for the given component."""
+ return os.path.join(log_path(), f'{component}.log')
diff --git a/beeflow/common/wf_data.py b/beeflow/common/wf_data.py
index 915ca67c3..17092d684 100644
--- a/beeflow/common/wf_data.py
+++ b/beeflow/common/wf_data.py
@@ -121,9 +121,9 @@ def __init__(self, name, base_command, hints, requirements, inputs, outputs, std
:param workflow_id: the workflow ID
:type workflow_id: str
:param task_id: the task ID
- :type task_id: str
+ :type task_id: str, optional
:param workdir: the working directory from which to get and store data
- :type workdir: path
+ :type workdir: path, optional
"""
self.name = name
self.base_command = base_command
diff --git a/beeflow/common/wf_interface.py b/beeflow/common/wf_interface.py
index 224c5fdba..38849342c 100644
--- a/beeflow/common/wf_interface.py
+++ b/beeflow/common/wf_interface.py
@@ -5,8 +5,6 @@
import re
-from beeflow.common.wf_data import Workflow, Task
-
class WorkflowInterface:
"""Interface for manipulating workflows."""
@@ -28,36 +26,22 @@ def reconnect(self):
"""Reconnect to the graph database using stored credentials."""
raise NotImplementedError()
- def initialize_workflow(self, workflow_id, name, inputs, outputs, requirements=None,
- hints=None):
+ def initialize_workflow(self, workflow):
"""Begin construction of a BEE workflow.
- :param workflow_id: the pre-generated workflow ID
- :type workflow_id: str
- :param name: the workflow name
- :type name: str
- :param inputs: the inputs to the workflow
- :type inputs: list of InputParameter
- :param outputs: the outputs of the workflow
- :type outputs: list of OutputParameter
- :param requirements: the workflow requirements
- :type requirements: list of Requirement
- :param hints: the workflow hints (optional requirements)
- :type hints: list of Hint
- :rtype: Workflow
+ :param workflow: the workflow object
+ :type workflow: Workflow
"""
if self.workflow_loaded():
raise RuntimeError("attempt to re-initialize existing workflow")
- if requirements is None:
- requirements = []
- if hints is None:
- hints = []
+ if workflow.requirements is None:
+ workflow.requirements = []
+ if workflow.hints is None:
+ workflow.hints = []
- workflow = Workflow(name, hints, requirements, inputs, outputs, workflow_id)
- self._workflow_id = workflow_id
+ self._workflow_id = workflow.id
# Load the new workflow into the graph database
self._gdb_interface.initialize_workflow(workflow)
- return workflow
def execute_workflow(self):
"""Begin execution of a BEE workflow."""
@@ -82,43 +66,24 @@ def finalize_workflow(self):
self._workflow_id = None
self._gdb_interface.cleanup()
- def add_task(self, name, base_command, inputs, outputs, requirements=None, hints=None,
- stdout=None, stderr=None):
+ def add_task(self, task):
"""Add a new task to a BEE workflow.
- :param name: the name given to the task
- :type name: str
- :param base_command: the base command for the task
- :type base_command: str or list of str
- :param requirements: the task-specific requirements
- :type requirements: list of Requirement
- :param hints: the task-specific hints (optional requirements)
- :type hints: list of Hint
- :param inputs: the task inputs
- :type inputs: list of StepInput
- :param outputs: the task outputs
- :type outputs: list of StepOutput
- :param stdout: the name of the file to which to redirect stdout
- :type stdout: str
- :param stderr: the name of the file to which to redirect stderr
- :type stderr: str
- :rtype: Task
+ :param task: the name of the file to which to redirect stderr
+ :type task: Task
"""
# Immutable default arguments
- if inputs is None:
- inputs = []
- if outputs is None:
- outputs = []
- if requirements is None:
- requirements = []
- if hints is None:
- hints = []
-
- task = Task(name, base_command, hints, requirements, inputs, outputs, stdout, stderr,
- self._workflow_id)
+ if task.inputs is None:
+ task.inputs = []
+ if task.outputs is None:
+ task.outputs = []
+ if task.requirements is None:
+ task.requirements = []
+ if task.hints is None:
+ task.hints = []
+
# Load the new task into the graph database
self._gdb_interface.load_task(task)
- return task
def restart_task(self, task, checkpoint_file):
"""Restart a failed BEE workflow task.
diff --git a/beeflow/common/worker/__init__.py b/beeflow/common/worker/__init__.py
index fbc908b1f..6e7d4ffd8 100644
--- a/beeflow/common/worker/__init__.py
+++ b/beeflow/common/worker/__init__.py
@@ -2,12 +2,14 @@
from beeflow.common.worker.slurm_worker import SlurmWorker
from beeflow.common.worker.lsf_worker import LSFWorker
+from beeflow.common.worker.flux_worker import FluxWorker
from beeflow.common.worker.simple_worker import SimpleWorker
supported_workload_schedulers = {
'Slurm': SlurmWorker,
'LSF': LSFWorker,
+ 'Flux': FluxWorker,
'Simple': SimpleWorker,
}
diff --git a/beeflow/common/worker/flux_worker.py b/beeflow/common/worker/flux_worker.py
new file mode 100644
index 000000000..353f38214
--- /dev/null
+++ b/beeflow/common/worker/flux_worker.py
@@ -0,0 +1,132 @@
+"""Flux worker interface."""
+
+import os
+from beeflow.common import log as bee_logging
+from beeflow.common.worker.worker import Worker
+
+log = bee_logging.setup(__name__)
+
+# Map from flux states to BEE statuses
+BEE_STATES = {
+ 'NEW': 'PENDING',
+ 'DEPEND': 'PENDING',
+ 'PRIORITY': 'PENDING',
+ 'SCHED': 'PENDING',
+ 'RUN': 'RUNNING',
+ 'CLEANUP': 'COMPLETING',
+ 'INACTIVE': 'COMPLETED',
+ 'COMPLETED': 'COMPLETED',
+ 'FAILED': 'FAILED',
+ 'CANCELED': 'CANCELED',
+ 'TIMEOUT': 'TIMEOUT',
+}
+
+
+class FluxWorker(Worker):
+ """Flux worker code."""
+
+ def __init__(self, **kwargs):
+ """Initialize the flux worker object."""
+ super().__init__(**kwargs)
+ # Only try to import the Flux API if we need it
+ import flux # noqa this is necessary since flux may not be installed
+ from flux import job # noqa
+ self.flux = flux
+ self.job = job
+
+ def build_text(self, task):
+ """Build text for task script."""
+ # TODO: Not used for the Flux worker
+
+ def build_jobspec(self, task):
+ """Build the job spec for a task."""
+ # TODO: This has a lot of code in common with the other worker's build_text
+ crt_res = self.crt.run_text(task)
+ script = [
+ '#!/bin/bash',
+ 'set -e',
+ crt_res.env_code,
+ ]
+
+ # TODO: Should this entire model, saving stdout and stderr to files, be
+ # redone for Flux? It seems to provide some sort of KVS for storing
+ # output but I don't quite understand it.
+
+ # Get resource requirements
+ nodes = task.get_requirement('beeflow:MPIRequirement', 'nodes', default=1)
+ # TODO: 'ntasks' may not mean the same thing as with Slurm
+ ntasks = task.get_requirement('beeflow:MPIRequirement', 'ntasks', default=nodes)
+ # TODO: What to do with the MPI version?
+ # mpi_version = task.get_requirement('beeflow:MPIRequirement', 'mpiVersion',
+ # default='pmi2')
+
+ for cmd in crt_res.pre_commands:
+ if cmd.type == 'one-per-node':
+ cmd_args = ['flux', 'run', '-N', str(nodes), '-n', str(nodes), ' '.join(cmd.args)]
+ else:
+ cmd_args = ['flux', 'run', ' '.join(cmd.args)]
+ script.append(' '.join(cmd_args))
+
+ # Set up the main command
+ args = ['flux', 'run', '-N', str(nodes), '-n', str(ntasks)]
+ if task.stdout is not None:
+ args.extend(['--output', task.stdout])
+ if task.stderr is not None:
+ args.extend(['--error', task.stderr])
+ args.extend(crt_res.main_command.args)
+ log.info(args)
+ script.append(' '.join(args))
+
+ for cmd in crt_res.post_commands:
+ if cmd.type == 'one-per-node':
+ cmd_args = ['flux', 'run', '-N', str(nodes), '-n', str(nodes), ' '.join(cmd.args)]
+ else:
+ cmd_args = ['flux', 'run', ' '.join(cmd.args)]
+ script.append(' '.join(cmd_args))
+
+ script = '\n'.join(script)
+ jobspec = self.job.JobspecV1.from_batch_command(script, task.name,
+ num_slots=ntasks,
+ num_nodes=nodes)
+ task_save_path = self.task_save_path(task)
+ jobspec.stdout = f'{task_save_path}/{task.name}-{task.id}.out'
+ jobspec.stderr = f'{task_save_path}/{task.name}-{task.id}.err'
+ jobspec.environment = dict(os.environ)
+ # Save the script for later reference
+ with open(f'{task_save_path}/{task.name}-{task.id}.sh', 'w', encoding='utf-8') as fp:
+ fp.write(script)
+ return jobspec
+
+ def submit_task(self, task):
+ """Worker submits task; returns job_id, job_state."""
+ log.info(f'Submitting task: {task.name}')
+ jobspec = self.build_jobspec(task)
+ flux = self.flux.Flux()
+ job_id = self.job.submit(flux, jobspec)
+ return job_id, self.query_task(job_id)
+
+ def cancel_task(self, job_id):
+ """Cancel task with job_id; returns job_state."""
+ log.info(f'Cancelling task with ID: {job_id}')
+ flux = self.flux.Flux()
+ self.job.cancel(flux, job_id)
+ return 'CANCELED'
+
+ def query_task(self, job_id):
+ """Query job state for the task."""
+ # TODO: How does Flux handle TIMEOUT/TIMELIMIT? They don't seem to have
+ # a state for this
+ log.info(f'Querying task with job_id: {job_id}')
+ flux = self.flux.Flux()
+ info = self.job.get_job(flux, job_id)
+ log.info(info)
+
+ # TODO: May need to check for return codes other than 0 if
+ # specified by the task (although I'm not sure how we can keep
+ # track of this with job ID alone)
+
+ # Note: using 'status' here instead of 'state'
+ return BEE_STATES[info['status']]
+# Ignoring W0511: TODO's are needed here to indicate parts of the code that may
+# need more work or thought
+# pylama:ignore=W0511
diff --git a/beeflow/common/worker/slurm_worker.py b/beeflow/common/worker/slurm_worker.py
index b4a752516..74ca4ef96 100644
--- a/beeflow/common/worker/slurm_worker.py
+++ b/beeflow/common/worker/slurm_worker.py
@@ -42,7 +42,7 @@ def build_text(self, task):
else:
main_command_srun_args = []
nodes = task.get_requirement('beeflow:MPIRequirement', 'nodes', default=1)
- ntasks = task.get_requirement('beeflow:MPIRequirement', 'ntasks', default=1)
+ ntasks = task.get_requirement('beeflow:MPIRequirement', 'ntasks', default=nodes)
mpi_version = task.get_requirement('beeflow:MPIRequirement', 'mpiVersion', default='pmi2')
time_limit = task.get_requirement('beeflow:SchedulerRequirement', 'timeLimit',
default=self.default_time_limit)
@@ -144,6 +144,7 @@ def query_task(self, job_id):
try:
resp = self.session.get(f'{self.slurm_url}/job/{job_id}')
+ log.info(resp.text)
if resp.status_code != 200:
raise WorkerError(f'Failed to query job {job_id}')
data = json.loads(resp.text)
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/Dockerfile.pennant-flux-x86_64 b/beeflow/data/cwl/bee_workflows/pennant-build/Dockerfile.pennant-flux-x86_64
new file mode 100644
index 000000000..182441fe1
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/Dockerfile.pennant-flux-x86_64
@@ -0,0 +1,130 @@
+# Based on example from @qwofford
+FROM almalinux:8
+
+RUN dnf install -y 'dnf-command(config-manager)' epel-release \
+ && dnf config-manager --set-enabled powertools epel \
+ && dnf install -y \
+ gcc \
+ gcc-c++ \
+ gcc-gfortran \
+ make \
+ git \
+ binutils \
+ libtool \
+ pkgconf \
+ autoconf \
+ automake \
+ ibacm \
+ libevent-devel \
+ libibumad-devel \
+ libibverbs-devel \
+ librdmacm-devel \
+ libibverbs-utils \
+ rdma-core \
+ numactl-devel \
+ perl \
+ cmake \
+ bzip2 \
+ ca-certificates \
+ wget \
+ autoconf \
+ automake \
+ libtool \
+ make \
+ pkgconfig \
+ glibc-devel \
+ zeromq-devel \
+ czmq-devel \
+ libuuid-devel \
+ jansson-devel \
+ lz4-devel \
+ libarchive-devel \
+ hwloc-devel \
+ sqlite-devel \
+ lua \
+ lua-devel \
+ lua-posix \
+ python3-devel \
+ python3-sphinx \
+ python3-cffi \
+ python3-yaml \
+ python3-jsonschema \
+ aspell \
+ aspell-en \
+ valgrind-devel \
+ mpich-devel \
+ jq \
+ ncurses-devel \
+ && dnf clean all
+
+ARG NUMJOBS=8
+
+# Build flux-core
+ARG FLUX_CORE_VERSION=0.50.0
+ARG FLUX_CORE_PREFIX=/usr
+RUN git clone https://github.com/flux-framework/flux-core.git \
+ && cd flux-core \
+ && git checkout v0.50.0 \
+ && ./autogen.sh \
+ && ./configure --prefix=${FLUX_CORE_PREFIX} \
+ && make -j${NUMJOBS} install \
+ && cd .. \
+ && rm -rf flux-core
+
+# Build UCX
+ARG UCX_VERSION=1.13.1
+ARG UCX_PREFIX=/usr/local
+RUN wget --no-check-certificate https://github.com/openucx/ucx/releases/download/v1.13.1/ucx-1.13.1.tar.gz \
+ && tar -xvf ucx-${UCX_VERSION}.tar.gz \
+ && cd ucx-${UCX_VERSION} \
+ && ./contrib/configure-release --prefix=${UCX_PREFIX} \
+ && make -j${NUMJOBS} install \
+ && cd ../ \
+ && rm -rf ucx-${UCX_VERSION}
+
+# Build PMI2 from Slurm
+ARG SLURM_VERSION=22.05.8
+ARG SLURM_PREFIX=/usr/local
+RUN wget --no-check-certificate https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 \
+ && tar -xvf slurm-${SLURM_VERSION}.tar.bz2 \
+ && cd slurm-${SLURM_VERSION} \
+ && ./configure --prefix=${SLURM_PREFIX} \
+ && cd contribs/pmi2 \
+ && make -j${NUMJOBS} install \
+ && cd ../../../ \
+ && rm -rf slurm-${SLURM_VERSION}
+
+# Now build openmpi
+ARG OMPI_VERSION=4.1.4
+ARG OMPI_PREFIX=/usr/local
+RUN wget --no-check-certificate https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OMPI_VERSION}.tar.bz2 \
+ && tar -xvf openmpi-${OMPI_VERSION}.tar.bz2 \
+ && cd openmpi-${OMPI_VERSION} \
+ && CFLAGS=-O3 \
+ CXXFLAGS=-O3 \
+ ./configure --prefix=${OMPI_PREFIX} \
+ --with-slurm \
+ --with-flux-pmi \
+ --with-pmi=${SLURM_PREFIX} \
+ --with-pmix \
+ --with-ucx=${UCX_PREFIX} \
+ --disable-pty-support \
+ --enable-mca-no-build=btl-openib,plm-slurm \
+ && make -j${NUMJOBS} install \
+ && cd ../ \
+ && rm -rf openmpi-${OMPI_VERSION}
+RUN ldconfig
+
+# Build CoMD (the MPI version)
+#ARG REPO=https://github.com/ECP-copa/CoMD.git
+#RUN git clone $REPO && \
+# cd CoMD && \
+# cp src-mpi/Makefile.vanilla src-mpi/Makefile && \
+# make -C src-mpi
+
+ARG REPO=https://github.com/lanl/PENNANT.git
+RUN git clone $REPO && \
+ cd PENNANT && \
+ sed -i '16,19s/^#//' Makefile && \
+ sed -i '22,25s/^/#/' Makefile && \
+ make
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/Dockerfile.pennant-graph-x86_64 b/beeflow/data/cwl/bee_workflows/pennant-build/Dockerfile.pennant-graph-x86_64
new file mode 100644
index 000000000..a29ebb0f0
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/Dockerfile.pennant-graph-x86_64
@@ -0,0 +1,30 @@
+# Build a container with matplotlib for graphing
+#
+# `ch-image build --force -f Dockerfile.pennant-graph-x86_64 -t pennant-graph .`
+FROM almalinux:8
+
+RUN dnf update \
+ && dnf install -y \
+ gcc \
+ gcc-c++ \
+ binutils \
+ libtool \
+ autoconf \
+ automake \
+ cmake \
+ pkgconf \
+ bzip2-devel \
+ zlib-devel \
+ libjpeg-devel \
+ libpng-devel \
+ python3 \
+ python3-devel
+
+RUN python3 -m venv /venv \
+ && echo ". /venv/bin/activate" >> /etc/profile.d/venv.sh \
+ && . /venv/bin/activate \
+ && pip install matplotlib
+
+COPY graph_pennant.py graph_pennant.sh /
+
+RUN chmod 755 /graph_pennant.sh
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/graph.cwl b/beeflow/data/cwl/bee_workflows/pennant-build/graph.cwl
new file mode 100644
index 000000000..f6cedd9df
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/graph.cwl
@@ -0,0 +1,27 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /graph_pennant.sh
+
+inputs:
+ out1node:
+ type: File
+ inputBinding:
+ position: 1
+ out2node:
+ type: File
+ inputBinding:
+ position: 2
+ out4node:
+ type: File
+ inputBinding:
+ position: 3
+ out8node:
+ type: File
+ inputBinding:
+ position: 4
+outputs:
+ image:
+ type: File
+ outputBinding:
+ glob: graph.png
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/graph_pennant.py b/beeflow/data/cwl/bee_workflows/pennant-build/graph_pennant.py
new file mode 100644
index 000000000..c1fc92a56
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/graph_pennant.py
@@ -0,0 +1,43 @@
+"""Graph the output of a PENNANT workflow."""
+import re
+import sys
+import matplotlib.pyplot as plt
+
+
+results = []
+for fname in sys.argv[1:]:
+ pe_count = 0
+ times = []
+ with open(fname, encoding='utf-8') as fp:
+ for line in fp:
+ # Check for the PE count
+ m_pe_count = re.match(r'Running on (\d+) MPI PE\(s\)', line)
+ if m_pe_count:
+ pe_count = int(m_pe_count.group(1))
+ continue
+ # Check for an End cyle line
+ if not line.startswith('End cycle'):
+ continue
+ _, _, _, wall = line.split(',')
+ _, time = wall.split('=')
+ time = float(time.strip())
+ times.append(time)
+ results.append({
+ 'pe_count': pe_count,
+ 'average_wall_time': sum(times) / len(times),
+ })
+
+# The node counts
+x = [str(result['pe_count']) for result in results]
+# Average wall for cycle
+y = [result['average_wall_time'] for result in results]
+fig, ax = plt.subplots()
+ax.plot(x, y)
+ax.set_title('PENNANT Workflow Run')
+ax.set_xlabel('Node count')
+ax.set_ylabel('Average wall time for cycle')
+# Save to a png file
+fig.savefig('graph.png')
+
+# Ignore C0103: This is just a simple script, not all globals should be UPPER_CASE here
+# pylama:ignore=C0103
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/graph_pennant.sh b/beeflow/data/cwl/bee_workflows/pennant-build/graph_pennant.sh
new file mode 100644
index 000000000..762adf6bb
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/graph_pennant.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Wrapper to make sure the environment is set up
+
+. /venv/bin/activate
+python3 /graph_pennant.py $@
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/pennant.yml b/beeflow/data/cwl/bee_workflows/pennant-build/pennant.yml
new file mode 100644
index 000000000..3a5ec44ad
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/pennant.yml
@@ -0,0 +1 @@
+pnt: '/PENNANT/test/sedovbig/sedovbig.pnt'
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/pennant_1_node.cwl b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_1_node.cwl
new file mode 100644
index 000000000..d0d7c3ae4
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_1_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_1_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/pennant_2_node.cwl b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_2_node.cwl
new file mode 100644
index 000000000..d5646b578
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_2_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_2_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/pennant_4_node.cwl b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_4_node.cwl
new file mode 100644
index 000000000..56c8a5e7f
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_4_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_4_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/pennant_8_node.cwl b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_8_node.cwl
new file mode 100644
index 000000000..98dffc2f2
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_8_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_8_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant-build/pennant_wf.cwl b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_wf.cwl
new file mode 100644
index 000000000..4327eb708
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant-build/pennant_wf.cwl
@@ -0,0 +1,85 @@
+cwlVersion: v1.2
+class: Workflow
+
+inputs:
+ pnt: File
+
+outputs:
+ output_1_node:
+ type: File
+ outputSource: 1_node/output
+ output_2_node:
+ type: File
+ outputSource: 2_node/output
+ output_4_node:
+ type: File
+ outputSource: 4_node/output
+ output_8_node:
+ type: File
+ outputSource: 8_node/output
+ image:
+ type: File
+ outputSource: graph/image
+
+steps:
+ 1_node:
+ run: pennant_1_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 1
+ DockerRequirement:
+ dockerFile: "Dockerfile.pennant-flux-x86_64"
+ beeflow:containerName: "pennant-flux"
+
+ 2_node:
+ run: pennant_2_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 2
+ DockerRequirement:
+ dockerFile: "Dockerfile.pennant-flux-x86_64"
+ beeflow:containerName: "pennant-flux"
+
+ 4_node:
+ run: pennant_4_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 4
+ DockerRequirement:
+ dockerFile: "Dockerfile.pennant-flux-x86_64"
+ beeflow:containerName: "pennant-flux"
+
+ 8_node:
+ run: pennant_8_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 8
+ DockerRequirement:
+ dockerFile: "Dockerfile.pennant-flux-x86_64"
+ beeflow:containerName: "pennant-flux"
+
+ graph:
+ run: graph.cwl
+ in:
+ out1node: 1_node/output
+ out2node: 2_node/output
+ out4node: 4_node/output
+ out8node: 8_node/output
+ out: [image]
+ hints:
+ DockerRequirement:
+ dockerFile: "Dockerfile.pennant-graph-x86_64"
+ beeflow:containerName: "pennant-graph"
+
diff --git a/beeflow/data/cwl/bee_workflows/pennant/graph.cwl b/beeflow/data/cwl/bee_workflows/pennant/graph.cwl
new file mode 100644
index 000000000..bc1276e12
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant/graph.cwl
@@ -0,0 +1,23 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /graph_pennant.sh
+
+inputs:
+ out1node:
+ type: File
+ inputBinding:
+ position: 1
+ out2node:
+ type: File
+ inputBinding:
+ position: 2
+ out4node:
+ type: File
+ inputBinding:
+ position: 3
+outputs:
+ image:
+ type: File
+ outputBinding:
+ glob: graph.png
diff --git a/beeflow/data/cwl/bee_workflows/pennant/pennant.yml b/beeflow/data/cwl/bee_workflows/pennant/pennant.yml
new file mode 100644
index 000000000..3a5ec44ad
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant/pennant.yml
@@ -0,0 +1 @@
+pnt: '/PENNANT/test/sedovbig/sedovbig.pnt'
diff --git a/beeflow/data/cwl/bee_workflows/pennant/pennant_1_node.cwl b/beeflow/data/cwl/bee_workflows/pennant/pennant_1_node.cwl
new file mode 100644
index 000000000..d0d7c3ae4
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant/pennant_1_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_1_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant/pennant_2_node.cwl b/beeflow/data/cwl/bee_workflows/pennant/pennant_2_node.cwl
new file mode 100644
index 000000000..d5646b578
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant/pennant_2_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_2_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant/pennant_4_node.cwl b/beeflow/data/cwl/bee_workflows/pennant/pennant_4_node.cwl
new file mode 100644
index 000000000..56c8a5e7f
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant/pennant_4_node.cwl
@@ -0,0 +1,13 @@
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: /PENNANT/build/pennant
+
+inputs:
+ pnt:
+ type: File
+ inputBinding: {}
+stdout: pennant_4_node.out
+outputs:
+ output:
+ type: stdout
diff --git a/beeflow/data/cwl/bee_workflows/pennant/pennant_wf.cwl b/beeflow/data/cwl/bee_workflows/pennant/pennant_wf.cwl
new file mode 100644
index 000000000..d0d2c5555
--- /dev/null
+++ b/beeflow/data/cwl/bee_workflows/pennant/pennant_wf.cwl
@@ -0,0 +1,61 @@
+cwlVersion: v1.2
+class: Workflow
+
+inputs:
+ pnt: File
+
+outputs:
+ output_1_node:
+ type: File
+ outputSource: 1_node/output
+ output_2_node:
+ type: File
+ outputSource: 2_node/output
+ output_4_node:
+ type: File
+ outputSource: 4_node/output
+ image:
+ type: File
+ outputSource: graph/image
+
+steps:
+ 1_node:
+ run: pennant_1_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 1
+ DockerRequirement:
+ beeflow:useContainer: "$HOME/img/pennant.tar.gz"
+ 2_node:
+ run: pennant_2_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 2
+ DockerRequirement:
+ beeflow:useContainer: "$HOME/img/pennant.tar.gz"
+ 4_node:
+ run: pennant_4_node.cwl
+ in:
+ pnt: pnt
+ out: [output]
+ hints:
+ beeflow:MPIRequirement:
+ nodes: 4
+ DockerRequirement:
+ beeflow:useContainer: "$HOME/img/pennant.tar.gz"
+ graph:
+ run: graph.cwl
+ in:
+ out1node: 1_node/output
+ out2node: 2_node/output
+ out4node: 4_node/output
+ out: [image]
+ hints:
+ DockerRequirement:
+ beeflow:useContainer: "$HOME/img/pennant-graph.tar.gz"
diff --git a/beeflow/data/dockerfiles/Dockerfile.comd-flux-x86_64 b/beeflow/data/dockerfiles/Dockerfile.comd-flux-x86_64
new file mode 100644
index 000000000..df0808b20
--- /dev/null
+++ b/beeflow/data/dockerfiles/Dockerfile.comd-flux-x86_64
@@ -0,0 +1,123 @@
+# Based on example from @qwofford
+FROM almalinux:8
+
+RUN dnf install -y 'dnf-command(config-manager)' epel-release \
+ && dnf config-manager --set-enabled powertools epel \
+ && dnf install -y \
+ gcc \
+ gcc-c++ \
+ gcc-gfortran \
+ make \
+ git \
+ binutils \
+ libtool \
+ pkgconf \
+ autoconf \
+ automake \
+ ibacm \
+ libevent-devel \
+ libibumad-devel \
+ libibverbs-devel \
+ librdmacm-devel \
+ libibverbs-utils \
+ rdma-core \
+ numactl-devel \
+ perl \
+ cmake \
+ bzip2 \
+ ca-certificates \
+ wget \
+ autoconf \
+ automake \
+ libtool \
+ make \
+ pkgconfig \
+ glibc-devel \
+ zeromq-devel \
+ czmq-devel \
+ libuuid-devel \
+ jansson-devel \
+ lz4-devel \
+ libarchive-devel \
+ hwloc-devel \
+ sqlite-devel \
+ lua \
+ lua-devel \
+ lua-posix \
+ python3-devel \
+ python3-sphinx \
+ python3-cffi \
+ python3-yaml \
+ python3-jsonschema \
+ aspell \
+ aspell-en \
+ valgrind-devel \
+ mpich-devel \
+ jq \
+ ncurses-devel \
+ && dnf clean all
+
+ARG NUMJOBS=8
+
+# Build flux-core
+ARG FLUX_CORE_VERSION=0.50.0
+ARG FLUX_CORE_PREFIX=/usr
+RUN git clone https://github.com/flux-framework/flux-core.git \
+ && cd flux-core \
+ && git checkout v0.50.0 \
+ && ./autogen.sh \
+ && ./configure --prefix=${FLUX_CORE_PREFIX} \
+ && make -j${NUMJOBS} install \
+ && cd .. \
+ && rm -rf flux-core
+
+# Build UCX
+ARG UCX_VERSION=1.13.1
+ARG UCX_PREFIX=/usr/local
+RUN wget --no-check-certificate https://github.com/openucx/ucx/releases/download/v1.13.1/ucx-1.13.1.tar.gz \
+ && tar -xvf ucx-${UCX_VERSION}.tar.gz \
+ && cd ucx-${UCX_VERSION} \
+ && ./contrib/configure-release --prefix=${UCX_PREFIX} \
+ && make -j${NUMJOBS} install \
+ && cd ../ \
+ && rm -rf ucx-${UCX_VERSION}
+
+# Build PMI2 from Slurm
+ARG SLURM_VERSION=22.05.8
+ARG SLURM_PREFIX=/usr/local
+RUN wget --no-check-certificate https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 \
+ && tar -xvf slurm-${SLURM_VERSION}.tar.bz2 \
+ && cd slurm-${SLURM_VERSION} \
+ && ./configure --prefix=${SLURM_PREFIX} \
+ && cd contribs/pmi2 \
+ && make -j${NUMJOBS} install \
+ && cd ../../../ \
+ && rm -rf slurm-${SLURM_VERSION}
+
+# Now build openmpi
+ARG OMPI_VERSION=4.1.4
+ARG OMPI_PREFIX=/usr/local
+RUN wget --no-check-certificate https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OMPI_VERSION}.tar.bz2 \
+ && tar -xvf openmpi-${OMPI_VERSION}.tar.bz2 \
+ && cd openmpi-${OMPI_VERSION} \
+ && CFLAGS=-O3 \
+ CXXFLAGS=-O3 \
+ ./configure --prefix=${OMPI_PREFIX} \
+ --with-slurm \
+ --with-flux-pmi \
+ --with-pmi=${SLURM_PREFIX} \
+ --with-pmix \
+ --with-ucx=${UCX_PREFIX} \
+ --disable-pty-support \
+ --enable-mca-no-build=btl-openib,plm-slurm \
+ && make -j${NUMJOBS} install \
+ && cd ../ \
+ && rm -rf openmpi-${OMPI_VERSION}
+RUN ldconfig
+
+# Build CoMD (the MPI version)
+ARG REPO=https://github.com/ECP-copa/CoMD.git
+RUN git clone $REPO && \
+ cd CoMD && \
+ cp src-mpi/Makefile.vanilla src-mpi/Makefile && \
+ make -C src-mpi
diff --git a/beeflow/data/dockerfiles/Dockerfile.pennant-flux-x86_64 b/beeflow/data/dockerfiles/Dockerfile.pennant-flux-x86_64
new file mode 100644
index 000000000..182441fe1
--- /dev/null
+++ b/beeflow/data/dockerfiles/Dockerfile.pennant-flux-x86_64
@@ -0,0 +1,130 @@
+# Based on example from @qwofford
+FROM almalinux:8
+
+RUN dnf install -y 'dnf-command(config-manager)' epel-release \
+ && dnf config-manager --set-enabled powertools epel \
+ && dnf install -y \
+ gcc \
+ gcc-c++ \
+ gcc-gfortran \
+ make \
+ git \
+ binutils \
+ libtool \
+ pkgconf \
+ autoconf \
+ automake \
+ ibacm \
+ libevent-devel \
+ libibumad-devel \
+ libibverbs-devel \
+ librdmacm-devel \
+ libibverbs-utils \
+ rdma-core \
+ numactl-devel \
+ perl \
+ cmake \
+ bzip2 \
+ ca-certificates \
+ wget \
+ autoconf \
+ automake \
+ libtool \
+ make \
+ pkgconfig \
+ glibc-devel \
+ zeromq-devel \
+ czmq-devel \
+ libuuid-devel \
+ jansson-devel \
+ lz4-devel \
+ libarchive-devel \
+ hwloc-devel \
+ sqlite-devel \
+ lua \
+ lua-devel \
+ lua-posix \
+ python3-devel \
+ python3-sphinx \
+ python3-cffi \
+ python3-yaml \
+ python3-jsonschema \
+ aspell \
+ aspell-en \
+ valgrind-devel \
+ mpich-devel \
+ jq \
+ ncurses-devel \
+ && dnf clean all
+
+ARG NUMJOBS=8
+
+# Build flux-core
+ARG FLUX_CORE_VERSION=0.50.0
+ARG FLUX_CORE_PREFIX=/usr
+RUN git clone https://github.com/flux-framework/flux-core.git \
+ && cd flux-core \
+ && git checkout v0.50.0 \
+ && ./autogen.sh \
+ && ./configure --prefix=${FLUX_CORE_PREFIX} \
+ && make -j${NUMJOBS} install \
+ && cd .. \
+ && rm -rf flux-core
+
+# Build UCX
+ARG UCX_VERSION=1.13.1
+ARG UCX_PREFIX=/usr/local
+RUN wget --no-check-certificate https://github.com/openucx/ucx/releases/download/v1.13.1/ucx-1.13.1.tar.gz \
+ && tar -xvf ucx-${UCX_VERSION}.tar.gz \
+ && cd ucx-${UCX_VERSION} \
+ && ./contrib/configure-release --prefix=${UCX_PREFIX} \
+ && make -j${NUMJOBS} install \
+ && cd ../ \
+ && rm -rf ucx-${UCX_VERSION}
+
+# Build PMI2 from Slurm
+ARG SLURM_VERSION=22.05.8
+ARG SLURM_PREFIX=/usr/local
+RUN wget --no-check-certificate https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 \
+ && tar -xvf slurm-${SLURM_VERSION}.tar.bz2 \
+ && cd slurm-${SLURM_VERSION} \
+ && ./configure --prefix=${SLURM_PREFIX} \
+ && cd contribs/pmi2 \
+ && make -j${NUMJOBS} install \
+ && cd ../../../ \
+ && rm -rf slurm-${SLURM_VERSION}
+
+# Now build openmpi
+ARG OMPI_VERSION=4.1.4
+ARG OMPI_PREFIX=/usr/local
+RUN wget --no-check-certificate https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OMPI_VERSION}.tar.bz2 \
+ && tar -xvf openmpi-${OMPI_VERSION}.tar.bz2 \
+ && cd openmpi-${OMPI_VERSION} \
+ && CFLAGS=-O3 \
+ CXXFLAGS=-O3 \
+ ./configure --prefix=${OMPI_PREFIX} \
+ --with-slurm \
+ --with-flux-pmi \
+ --with-pmi=${SLURM_PREFIX} \
+ --with-pmix \
+ --with-ucx=${UCX_PREFIX} \
+ --disable-pty-support \
+ --enable-mca-no-build=btl-openib,plm-slurm \
+ && make -j${NUMJOBS} install \
+ && cd ../ \
+ && rm -rf openmpi-${OMPI_VERSION}
+RUN ldconfig
+
+# Build CoMD (the MPI version)
+#ARG REPO=https://github.com/ECP-copa/CoMD.git
+#RUN git clone $REPO && \
+# cd CoMD && \
+# cp src-mpi/Makefile.vanilla src-mpi/Makefile && \
+# make -C src-mpi
+
+ARG REPO=https://github.com/lanl/PENNANT.git
+RUN git clone $REPO && \
+ cd PENNANT && \
+ sed -i '16,19s/^#//' Makefile && \
+ sed -i '22,25s/^/#/' Makefile && \
+ make
diff --git a/beeflow/data/dockerfiles/pennant-graph/Dockerfile.pennant-graph-x86_64 b/beeflow/data/dockerfiles/pennant-graph/Dockerfile.pennant-graph-x86_64
new file mode 100644
index 000000000..a29ebb0f0
--- /dev/null
+++ b/beeflow/data/dockerfiles/pennant-graph/Dockerfile.pennant-graph-x86_64
@@ -0,0 +1,30 @@
+# Build a container with matplotlib for graphing
+#
+# `ch-image build --force -f Dockerfile.pennant-graph-x86_64 -t pennant-graph .`
+FROM almalinux:8
+
+RUN dnf update \
+ && dnf install -y \
+ gcc \
+ gcc-c++ \
+ binutils \
+ libtool \
+ autoconf \
+ automake \
+ cmake \
+ pkgconf \
+ bzip2-devel \
+ zlib-devel \
+ libjpeg-devel \
+ libpng-devel \
+ python3 \
+ python3-devel
+
+RUN python3 -m venv /venv \
+ && echo ". /venv/bin/activate" >> /etc/profile.d/venv.sh \
+ && . /venv/bin/activate \
+ && pip install matplotlib
+
+COPY graph_pennant.py graph_pennant.sh /
+
+RUN chmod 755 /graph_pennant.sh
diff --git a/beeflow/data/dockerfiles/pennant-graph/graph_pennant.py b/beeflow/data/dockerfiles/pennant-graph/graph_pennant.py
new file mode 100644
index 000000000..c1fc92a56
--- /dev/null
+++ b/beeflow/data/dockerfiles/pennant-graph/graph_pennant.py
@@ -0,0 +1,43 @@
+"""Graph the output of a PENNANT workflow."""
+import re
+import sys
+import matplotlib.pyplot as plt
+
+
+results = []
+for fname in sys.argv[1:]:
+ pe_count = 0
+ times = []
+ with open(fname, encoding='utf-8') as fp:
+ for line in fp:
+ # Check for the PE count
+ m_pe_count = re.match(r'Running on (\d+) MPI PE\(s\)', line)
+ if m_pe_count:
+ pe_count = int(m_pe_count.group(1))
+ continue
+ # Check for an End cyle line
+ if not line.startswith('End cycle'):
+ continue
+ _, _, _, wall = line.split(',')
+ _, time = wall.split('=')
+ time = float(time.strip())
+ times.append(time)
+ results.append({
+ 'pe_count': pe_count,
+ 'average_wall_time': sum(times) / len(times),
+ })
+
+# The node counts
+x = [str(result['pe_count']) for result in results]
+# Average wall for cycle
+y = [result['average_wall_time'] for result in results]
+fig, ax = plt.subplots()
+ax.plot(x, y)
+ax.set_title('PENNANT Workflow Run')
+ax.set_xlabel('Node count')
+ax.set_ylabel('Average wall time for cycle')
+# Save to a png file
+fig.savefig('graph.png')
+
+# Ignore C0103: This is just a simple script, not all globals should be UPPER_CASE here
+# pylama:ignore=C0103
diff --git a/beeflow/data/dockerfiles/pennant-graph/graph_pennant.sh b/beeflow/data/dockerfiles/pennant-graph/graph_pennant.sh
new file mode 100644
index 000000000..762adf6bb
--- /dev/null
+++ b/beeflow/data/dockerfiles/pennant-graph/graph_pennant.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Wrapper to make sure the environment is set up
+
+. /venv/bin/activate
+python3 /graph_pennant.py $@
diff --git a/beeflow/scheduler/scheduler.py b/beeflow/scheduler/scheduler.py
index d3fd10adb..0e64c390e 100644
--- a/beeflow/scheduler/scheduler.py
+++ b/beeflow/scheduler/scheduler.py
@@ -21,9 +21,6 @@
flask_app = Flask(__name__)
api = Api(flask_app)
-# We have to call bc.init() here due to how gunicorn works
-bc.init()
-
bee_workdir = bc.get('DEFAULT', 'bee_workdir')
db_path = bee_workdir + '/' + 'sched.db'
@@ -83,30 +80,23 @@ def load_config_values():
"""
# Set the default config values
conf = {
- 'log': None,
- 'alloc_logfile': None,
'algorithm': None,
'default_algorithm': None,
- 'workdir': None,
}
for key in conf:
conf[key] = bc.get('scheduler', key)
# Set some defaults
- if not conf['log']:
- conf['log'] = '/'.join([bee_workdir, 'logs', 'scheduler.log'])
- if not conf['workdir']:
- conf['workdir'] = os.path.join(bee_workdir, 'scheduler')
- if not conf['alloc_logfile']:
- conf['alloc_logfile'] = os.path.join(conf['workdir'],
- ALLOC_LOGFILE)
+ conf['log'] = '/'.join([bee_workdir, 'logs', 'scheduler.log'])
+ conf['workdir'] = os.path.join(bee_workdir, 'scheduler')
+ conf['alloc_logfile'] = os.path.join(conf['workdir'], ALLOC_LOGFILE)
conf = argparse.Namespace(**conf)
log.info('Config = [')
- log.info(f'\talloc_logfile = {conf.alloc_logfile}')
+ log.info(f'\talloc_logfile = {conf.alloc_logfile}') # noqa pylama is wrong here
log.info(f'\talgorithm = {conf.algorithm}')
log.info(f'\tdefault_algorithm = {conf.default_algorithm}')
- log.info(f'\tworkdir = {conf.workdir}')
+ log.info(f'\tworkdir = {conf.workdir}') # noqa
log.info(']')
return conf
@@ -122,7 +112,7 @@ def create_app():
# Create the scheduler workdir, if necessary
# sched_listen_port = wf_utils.get_open_port()
# wf_db.set_sched_port(sched_listen_port)
- os.makedirs(conf.workdir, exist_ok=True)
+ os.makedirs(conf.workdir, exist_ok=True) # noqa
return flask_app
# Ignore W0511: This allows us to have TODOs in the code
diff --git a/beeflow/task_manager.py b/beeflow/task_manager.py
index fef5a3330..41c2d72de 100755
--- a/beeflow/task_manager.py
+++ b/beeflow/task_manager.py
@@ -19,19 +19,14 @@
from apscheduler.schedulers.background import BackgroundScheduler
from beeflow.common.config_driver import BeeConfig as bc
-
-from beeflow.common.db.bdb import connect_db
-
-# This must be imported before calling other parts of BEE
-bc.init()
-
-
from beeflow.common import log as bee_logging
from beeflow.common.build_interfaces import build_main
from beeflow.common.worker_interface import WorkerInterface
from beeflow.common.connection import Connection
import beeflow.common.worker as worker_pkg
from beeflow.common.db import tm_db
+from beeflow.common.db.bdb import connect_db
+from beeflow.common import paths
log = bee_logging.setup(__name__)
@@ -63,7 +58,7 @@ def _resource(tag=""):
def _wfm_conn():
"""Get a new connection to the WFM."""
- return Connection(bc.get('workflow_manager', 'socket'))
+ return Connection(paths.wfm_socket())
def update_task_state(workflow_id, task_id, job_state, **kwargs):
@@ -318,7 +313,7 @@ def get_status():
# Special slurm arguments
if WLS == 'Slurm':
worker_kwargs['use_commands'] = bc.get('slurm', 'use_commands')
- worker_kwargs['slurm_socket'] = bc.get('slurm', 'slurmrestd_socket')
+ worker_kwargs['slurm_socket'] = paths.slurm_socket()
worker_kwargs['openapi_version'] = bc.get('slurm', 'openapi_version')
worker = WorkerInterface(worker_class, **worker_kwargs)
diff --git a/beeflow/tests/test_parser.py b/beeflow/tests/test_parser.py
index b338e6aa8..b6a10e0b6 100644
--- a/beeflow/tests/test_parser.py
+++ b/beeflow/tests/test_parser.py
@@ -4,8 +4,8 @@
from pathlib import Path
import unittest
from beeflow.common.parser import CwlParser
-from beeflow.common.wf_data import generate_workflow_id
-from beeflow.tests.mocks import MockWFI
+from beeflow.common.wf_data import (generate_workflow_id, Workflow, Task, Hint,
+ StepInput, StepOutput, InputParameter, OutputParameter)
REPO_PATH = Path(*Path(__file__).parts[:-3])
@@ -21,46 +21,167 @@ class TestParser(unittest.TestCase):
@classmethod
def setUpClass(cls):
- """Start the GDB, initialize the CWL parser, which connects to the GDB."""
- cls.wfi = MockWFI()
- cls.parser = CwlParser(cls.wfi)
+ """Initialize the CWL parser."""
+ cls.parser = CwlParser()
- @classmethod
- def tearDownClass(cls):
- """Stop the GDB."""
+ def test_parse_workflow_yaml(self):
+ """Test parsing of workflow with a YAML input job file."""
+ cwl_wf_file = find("examples/clamr-ffmpeg-build/clamr_wf.cwl")
+ cwl_job_yaml = find("examples/clamr-ffmpeg-build/clamr_job.yml")
+ workflow_id = generate_workflow_id()
- def tearDown(self):
- """Clear all data in the Neo4j database."""
- if self.wfi.workflow_initialized() and self.wfi.workflow_loaded():
- self.wfi.finalize_workflow()
+ workflow, tasks = self.parser.parse_workflow(workflow_id, cwl_wf_file, cwl_job_yaml)
- def test_parse_workflow(self):
- """Test parsing of workflow with an input job file."""
- cwl_wfi_file = find("examples/clamr-ffmpeg-build/clamr_wf.cwl")
- cwl_job_yaml = find("examples/clamr-ffmpeg-build/clamr_job.yml")
+ self.assertEqual(workflow, WORKFLOW_GOLD)
+ self.assertListEqual(tasks, TASKS_GOLD)
+ for task in tasks:
+ self.assertEqual(task.workflow_id, workflow_id)
+
+ def test_parse_workflow_json(self):
+ """Test parsing of workflow with a JSON input job file."""
+ cwl_wf_file = find("examples/clamr-ffmpeg-build/clamr_wf.cwl")
cwl_job_json = find("examples/clamr-ffmpeg-build/clamr_job.json")
workflow_id = generate_workflow_id()
- # Test workflow parsing with YAML input job file
- wfi = self.parser.parse_workflow(workflow_id, cwl_wfi_file, cwl_job_yaml)
- self.assertTrue(wfi.workflow_loaded())
-
- wfi.finalize_workflow()
- self.assertFalse(wfi.workflow_loaded())
+ workflow, tasks = self.parser.parse_workflow(workflow_id, cwl_wf_file, cwl_job_json)
- # Test workflow parsing with JSON input job file
- wfi = self.parser.parse_workflow(workflow_id, cwl_wfi_file, cwl_job_json)
- self.assertTrue(wfi.workflow_loaded())
+ self.assertEqual(workflow, WORKFLOW_GOLD)
+ self.assertListEqual(tasks, TASKS_GOLD)
+ for task in tasks:
+ self.assertEqual(task.workflow_id, workflow_id)
def test_parse_workflow_no_job(self):
"""Test parsing of a workflow without an input job file."""
- cwl_wfi_file = find("beeflow/tests/cf.cwl")
+ cwl_wf_file = find("beeflow/tests/cf.cwl")
workflow_id = generate_workflow_id()
- # cwl_wfi_file = "examples/clamr-ffmpeg-build/clamr_wf.cwl"
-
- # Test workflow parsing without input job file
- wfi = self.parser.parse_workflow(workflow_id, cwl_wfi_file)
- self.assertTrue(wfi.workflow_loaded())
+ # cwl_wf_file = "examples/clamr-ffmpeg-build/clamr_wf.cwl"
+
+ workflow, tasks = self.parser.parse_workflow(workflow_id, cwl_wf_file)
+
+ self.assertEqual(workflow, WORKFLOW_NOJOB_GOLD)
+ self.assertListEqual(tasks, TASKS_NOJOB_GOLD)
+ for task in tasks:
+ self.assertEqual(task.workflow_id, workflow_id)
+
+
+WORKFLOW_GOLD = Workflow(
+ name='clamr_wf',
+ hints=[],
+ requirements=[],
+ inputs={InputParameter(id='input_format', type='string', value='image2'),
+ InputParameter(id='time_steps', type='int', value=5000),
+ InputParameter(id='output_filename', type='string', value='CLAMR_movie.mp4'),
+ InputParameter(id='frame_size', type='string', value='800x800'),
+ InputParameter(id='frame_rate', type='int', value=12),
+ InputParameter(id='max_levels', type='int', value=3),
+ InputParameter(id='graphics_type', type='string', value='png'),
+ InputParameter(id='steps_between_outputs', type='int', value=10),
+ InputParameter(id='pixel_format', type='string', value='yuv420p'),
+ InputParameter(id='grid_resolution', type='int', value=32),
+ InputParameter(id='steps_between_graphics', type='int', value=25)},
+ outputs={OutputParameter(id='clamr_stdout', type='File', value=None,
+ source='clamr/clamr_stdout'),
+ OutputParameter(id='clamr_movie', type='File', value=None, source='ffmpeg/movie'),
+ OutputParameter(id='ffmpeg_stderr', type='File', value=None,
+ source='ffmpeg/ffmpeg_stderr'),
+ OutputParameter(id='clamr_time_log', type='File', value=None,
+ source='clamr/time_log')},
+ workflow_id=generate_workflow_id())
+
+
+TASKS_GOLD = [
+ Task(
+ name='clamr',
+ base_command='/CLAMR/clamr_cpuonly',
+ hints=[Hint(class_='DockerRequirement', params={'dockerFile': '# Dockerfile.clamr-ffmpeg\n# Developed on Chicoma @lanl\n# Patricia Grubel \n\nFROM debian:11\n\n\nRUN apt-get update && \\\n apt-get install -y wget gnupg git cmake ffmpeg g++ make openmpi-bin libopenmpi-dev libpng-dev libpng16-16 libpng-tools imagemagick libmagickwand-6.q16-6 libmagickwand-6.q16-dev\n\nRUN git clone https://github.com/lanl/CLAMR.git\nRUN cd CLAMR && cmake . && make clamr_cpuonly\n', 'beeflow:containerName': 'clamr-ffmpeg'})], # noqa
+ requirements=[],
+ inputs=[StepInput(id='graphic_steps', type='int', value=None, default=None,
+ source='steps_between_graphics', prefix='-g', position=None,
+ value_from=None),
+ StepInput(id='graphics_type', type='string', value=None, default=None,
+ source='graphics_type', prefix='-G', position=None, value_from=None),
+ StepInput(id='grid_res', type='int', value=None, default=None,
+ source='grid_resolution', prefix='-n', position=None, value_from=None),
+ StepInput(id='max_levels', type='int', value=None, default=None,
+ source='max_levels', prefix='-l', position=None, value_from=None),
+ StepInput(id='output_steps', type='int', value=None, default=None,
+ source='steps_between_outputs', prefix='-i', position=None,
+ value_from=None),
+ StepInput(id='time_steps', type='int', value=None, default=None,
+ source='time_steps', prefix='-t', position=None, value_from=None)],
+ outputs=[StepOutput(id='clamr/clamr_stdout', type='stdout', value=None,
+ glob='clamr_stdout.txt'),
+ StepOutput(id='clamr/outdir', type='Directory', value=None,
+ glob='graphics_output/graph%05d.png'),
+ StepOutput(id='clamr/time_log', type='File', value=None,
+ glob='total_execution_time.log')],
+ stdout='clamr_stdout.txt',
+ stderr=None,
+ workflow_id=WORKFLOW_GOLD.id
+ ),
+ Task(
+ name='ffmpeg',
+ base_command='ffmpeg -y',
+ hints=[Hint(class_='DockerRequirement', params={'dockerFile': '# Dockerfile.clamr-ffmpeg\n# Developed on Chicoma @lanl\n# Patricia Grubel \n\nFROM debian:11\n\n\nRUN apt-get update && \\\n apt-get install -y wget gnupg git cmake ffmpeg g++ make openmpi-bin libopenmpi-dev libpng-dev libpng16-16 libpng-tools imagemagick libmagickwand-6.q16-6 libmagickwand-6.q16-dev\n\nRUN git clone https://github.com/lanl/CLAMR.git\nRUN cd CLAMR && cmake . && make clamr_cpuonly\n', 'beeflow:containerName': 'clamr-ffmpeg'})], # noqa
+ requirements=[],
+ inputs=[StepInput(id='ffmpeg_input', type='Directory', value=None, default=None,
+ source='clamr/outdir', prefix='-i', position=2,
+ value_from='$("/graph%05d.png")'),
+ StepInput(id='frame_rate', type='int', value=None, default=None,
+ source='frame_rate', prefix='-r', position=3, value_from=None),
+ StepInput(id='frame_size', type='string', value=None, default=None,
+ source='frame_size', prefix='-s', position=4, value_from=None),
+ StepInput(id='input_format', type='string', value=None, default=None,
+ source='input_format', prefix='-f', position=1, value_from=None),
+ StepInput(id='output_file', type='string', value=None, default=None,
+ source='output_filename', prefix=None, position=6, value_from=None),
+ StepInput(id='pixel_format', type='string', value=None, default=None,
+ source='pixel_format', prefix='-pix_fmt', position=5, value_from=None)],
+ outputs=[StepOutput(id='ffmpeg/movie', type='File', value=None,
+ glob='$(inputs.output_file)'),
+ StepOutput(id='ffmpeg/ffmpeg_stderr', type='stderr', value=None,
+ glob='ffmpeg_stderr.txt')],
+ stdout=None,
+ stderr='ffmpeg_stderr.txt',
+ workflow_id=WORKFLOW_GOLD.id)
+]
+
+
+WORKFLOW_NOJOB_GOLD = Workflow(
+ name='cf',
+ hints=[],
+ requirements=[],
+ inputs={InputParameter(id='infile', type='File', value=None)},
+ outputs={OutputParameter(id='ffmpeg_movie', type='File', value=None, source='ffmpeg/outfile'),
+ OutputParameter(id='clamr_dir', type='File', value=None, source='clamr/outfile')},
+ workflow_id=generate_workflow_id())
+
+
+TASKS_NOJOB_GOLD = [
+ Task(
+ name='clamr',
+ base_command='/clamr/CLAMR-master/clamr_cpuonly -n 32 -l 3 -t 5000 -i 10 -g 25 -G png',
+ hints=[Hint(class_='DockerRequirement',
+ params={'dockerImageId': '/usr/projects/beedev/clamr/clamr-toss.tar.gz'})],
+ requirements=[],
+ inputs=[],
+ outputs=[StepOutput(id='clamr/outfile', type='stdout', value=None,
+ glob='graphics_output')],
+ stdout='graphics_output',
+ stderr=None,
+ workflow_id=WORKFLOW_NOJOB_GOLD.id),
+ Task(
+ name='ffmpeg',
+ base_command='ffmpeg -f image2 -i $HOME/graphics_output/graph%05d.png -r 12 -s 800x800 -pix_fmt yuv420p $HOME/CLAMR_movie.mp4', # noqa
+ hints=[],
+ requirements=[],
+ inputs=[],
+ outputs=[StepOutput(id='ffmpeg/outfile', type='stdout', value=None,
+ glob='CLAMR_movie.mp4')],
+ stdout='CLAMR_movie.mp4',
+ stderr=None,
+ workflow_id=WORKFLOW_NOJOB_GOLD.id)
+]
if __name__ == '__main__':
diff --git a/beeflow/tests/test_slurm_worker.py b/beeflow/tests/test_slurm_worker.py
index 0767442c4..9befa2eba 100644
--- a/beeflow/tests/test_slurm_worker.py
+++ b/beeflow/tests/test_slurm_worker.py
@@ -5,11 +5,6 @@
import os
import pytest
from beeflow.common.config_driver import BeeConfig as bc
-
-
-bc.init()
-
-
from beeflow.common.worker_interface import WorkerInterface
from beeflow.common.worker.worker import WorkerError
from beeflow.common.worker.slurm_worker import SlurmWorker
diff --git a/beeflow/tests/test_wf_interface.py b/beeflow/tests/test_wf_interface.py
index 9b0158530..40034a8eb 100644
--- a/beeflow/tests/test_wf_interface.py
+++ b/beeflow/tests/test_wf_interface.py
@@ -3,12 +3,8 @@
import unittest
-from beeflow.common.config_driver import BeeConfig as bc
-
-bc.init()
-
-from beeflow.common.wf_data import (Requirement, Hint, InputParameter, OutputParameter,
- StepInput, StepOutput, generate_workflow_id)
+from beeflow.common.wf_data import (Workflow, Task, Requirement, Hint, InputParameter,
+ OutputParameter, StepInput, StepOutput, generate_workflow_id)
from beeflow.common.wf_interface import WorkflowInterface
from beeflow.tests.mocks import MockGDBInterface
@@ -36,11 +32,13 @@ def test_initialize_workflow(self):
hints = [Hint("ResourceRequirement", {"ramMin": 1024}),
Hint("NetworkAccess", {"networkAccess": True})]
workflow_id = generate_workflow_id()
- workflow = self.wfi.initialize_workflow(
- workflow_id, "test_workflow",
+ workflow = Workflow(
+ "test_workflow", hints, requirements,
[InputParameter("test_input", "File", "input.txt")],
[OutputParameter("test_output", "File", "output.txt", "viz/output")],
- requirements, hints)
+ workflow_id)
+
+ self.wfi.initialize_workflow(workflow)
gdb_workflow, _ = self.wfi.get_workflow()
@@ -52,11 +50,13 @@ def test_initialize_workflow(self):
def test_execute_workflow(self):
"""Test workflow execution initialization (set initial tasks' states to 'READY')."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- tasks = self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id))
+ tasks = self._create_test_tasks(workflow_id)
self.wfi.execute_workflow()
self.assertEqual("READY", self.wfi.get_task_state(tasks[0]))
@@ -64,11 +64,13 @@ def test_execute_workflow(self):
def test_pause_workflow(self):
"""Test workflow execution pausing (set running tasks' states to 'PAUSED')."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id))
+ self._create_test_tasks(workflow_id)
self.wfi.execute_workflow()
@@ -79,11 +81,13 @@ def test_pause_workflow(self):
def test_resume_workflow(self):
"""Test workflow execution resuming (set paused tasks' states to 'RUNNING')."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id))
+ self._create_test_tasks(workflow_id)
self.wfi.execute_workflow()
self.wfi.pause_workflow()
@@ -94,11 +98,14 @@ def test_resume_workflow(self):
def test_reset_workflow(self):
"""Test workflow execution resetting (set all tasks to 'WAITING', delete metadata)."""
- workflow = self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ workflow = Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- tasks = self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id)
+ self.wfi.initialize_workflow(workflow)
+ tasks = self._create_test_tasks(workflow_id)
metadata = {"cluster": "fog", "crt": "charliecloud",
"container_md5": "67df538c1b6893f4276d10b2af34ccfe", "job_id": 1337}
@@ -126,10 +133,11 @@ def test_reset_workflow(self):
def test_finalize_workflow(self):
"""Test workflow deletion from the graph database."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ generate_workflow_id()))
self.wfi.finalize_workflow()
self.assertFalse(self.wfi.workflow_loaded())
@@ -148,19 +156,25 @@ def test_add_task(self):
stdout = "output.txt"
stderr = "output-err.txt"
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_input/test_task_done")])
- task = self.wfi.add_task(
+ [OutputParameter("test_output", "File", "output.txt", "test_input/test_task_done")],
+ workflow_id))
+
+ task = Task(
name=task_name,
base_command=base_command,
+ hints=hints,
+ requirements=requirements,
inputs=inputs,
outputs=outputs,
- requirements=requirements,
- hints=hints,
stdout=stdout,
- stderr=stderr)
+ stderr=stderr,
+ workflow_id=workflow_id)
+
+ self.wfi.add_task(task)
# Task object assertions
self.assertEqual(task_name, task.name)
@@ -199,19 +213,25 @@ def test_restart_task(self):
stderr = "output-err.txt"
test_checkpoint_file = "/backup0.crx"
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_input/test_task_done")])
- task = self.wfi.add_task(
+ [OutputParameter("test_output", "File", "output.txt", "test_input/test_task_done")],
+ workflow_id))
+
+ task = Task(
name=task_name,
base_command=base_command,
+ hints=hints,
+ requirements=requirements,
inputs=inputs,
outputs=outputs,
- requirements=requirements,
- hints=hints,
stdout=stdout,
- stderr=stderr)
+ stderr=stderr,
+ workflow_id=workflow_id)
+
+ self.wfi.add_task(task)
# Restart the task, should create a new Task
new_task = self.wfi.restart_task(task, test_checkpoint_file)
@@ -255,11 +275,13 @@ def test_restart_task(self):
def test_finalize_task(self):
"""Test finalization of completed tasks."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- tasks = self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id))
+ tasks = self._create_test_tasks(workflow_id)
self.wfi.execute_workflow()
self.wfi.set_task_output(tasks[0], "prep/prep_output", "prep_output.txt")
ready_tasks = self.wfi.finalize_task(tasks[0])
@@ -288,19 +310,25 @@ def test_get_task_by_id(self):
stdout = "output.txt"
stderr = "output-err.txt"
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")])
- task = self.wfi.add_task(
+ [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")],
+ workflow_id))
+
+ task = Task(
name=task_name,
base_command=base_command,
+ hints=hints,
+ requirements=requirements,
inputs=inputs,
outputs=outputs,
- requirements=requirements,
- hints=hints,
stdout=stdout,
- stderr=stderr)
+ stderr=stderr,
+ workflow_id=workflow_id)
+
+ self.wfi.add_task(task)
self.assertEqual(task, self.wfi.get_task_by_id(task.id))
@@ -308,12 +336,13 @@ def test_get_workflow(self):
"""Test obtaining the workflow from the graph database."""
requirements = [Requirement("ResourceRequirement", {"ramMin": 1024})]
hints = [Hint("NetworkAccess", {"networkAccess": True})]
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", hints, requirements,
[InputParameter("test_input", "File", "input.txt")],
[OutputParameter("test_output", "File", "output.txt", "viz/output")],
- requirements, hints)
- tasks = self._create_test_tasks()
+ workflow_id))
+ tasks = self._create_test_tasks(workflow_id)
(workflow, wf_tasks) = self.wfi.get_workflow()
@@ -323,19 +352,22 @@ def test_get_workflow(self):
def test_get_workflow_outputs(self):
"""Test obtaining the outputs of a workflow."""
- workflow = self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow = Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")])
+ [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")],
+ generate_workflow_id())
+ self.wfi.initialize_workflow(workflow)
self.assertListEqual(workflow.outputs, self.wfi.get_workflow_outputs())
def test_get_workflow_state(self):
"""Test obtaining the state of a workflow."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")])
+ [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")],
+ generate_workflow_id()))
# Initialized workflow state should be 'SUBMITTED'
self.assertEqual("SUBMITTED", self.wfi.get_workflow_state())
@@ -347,10 +379,11 @@ def test_get_workflow_state(self):
def test_set_workflow_state(self):
"""Test setting the state of a workflow."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")])
+ [OutputParameter("test_output", "File", "output.txt", "test_task/test_task_done")],
+ generate_workflow_id()))
# Initialized workflow state should be 'SUBMITTED'
self.assertEqual("SUBMITTED", self.wfi.get_workflow_state())
@@ -362,11 +395,13 @@ def test_set_workflow_state(self):
def test_get_ready_tasks(self):
"""Test obtaining of ready workflow tasks."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- tasks = self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id))
+ tasks = self._create_test_tasks(workflow_id)
# Should be no ready tasks
self.assertListEqual([], self.wfi.get_ready_tasks())
@@ -379,11 +414,13 @@ def test_get_ready_tasks(self):
def test_get_dependent_tasks(self):
"""Test obtaining of dependent tasks."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
- tasks = self._create_test_tasks()
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ workflow_id))
+ tasks = self._create_test_tasks(workflow_id)
# Get dependent tasks of Data Prep
dependent_tasks = self.wfi.get_dependent_tasks(tasks[0])
@@ -392,32 +429,38 @@ def test_get_dependent_tasks(self):
def test_get_task_state(self):
"""Test obtaining of task state."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
# Should be WAITING because workflow not yet executed
self.assertEqual("WAITING", self.wfi.get_task_state(task))
def test_set_task_state(self):
"""Test the setting of task state."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
self.wfi.set_task_state(task, "RUNNING")
@@ -426,16 +469,19 @@ def test_set_task_state(self):
def test_get_task_metadata(self):
"""Test the obtaining of task metadata."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
metadata = {"cluster": "fog", "crt": "charliecloud",
"container_md5": "67df538c1b6893f4276d10b2af34ccfe", "job_id": 1337}
@@ -444,16 +490,19 @@ def test_get_task_metadata(self):
def test_set_task_metadata(self):
"""Test the setting of task metadata."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
metadata = {"cluster": "fog", "crt": "charliecloud",
"container_md5": "67df538c1b6893f4276d10b2af34ccfe", "job_id": 1337}
@@ -467,30 +516,36 @@ def test_set_task_metadata(self):
def test_get_task_input(self):
"""Test the obtaining of a task input."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
self.assertEqual(task.inputs[0], self.wfi.get_task_input(task, "test_input"))
def test_set_task_input(self):
"""Test the setting of a task input."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", None, "default.txt", "test_input", None, None, None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
test_input = StepInput("test_input", "File", "input.txt", "default.txt", "test_input",
None, None, None)
@@ -499,31 +554,37 @@ def test_set_task_input(self):
def test_get_task_output(self):
"""Test the obtaining of a task output."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
None)],
- [StepOutput("test_task/output", "File", "output.txt", "output.txt")])
+ [StepOutput("test_task/output", "File", "output.txt", "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
self.assertEqual(task.outputs[0], self.wfi.get_task_output(task, "test_task/output"))
def test_set_task_output(self):
"""Test the setting of a task output."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None,
None, None)],
- [StepOutput("test_task/output", "File", None, "output.txt")])
+ [StepOutput("test_task/output", "File", None, "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
test_output = StepOutput("test_task/output", "File", "output.txt", "output.txt")
self.wfi.set_task_output(task, "test_task/output", "output.txt")
@@ -531,16 +592,19 @@ def test_set_task_output(self):
def test_evaluate_expression(self):
"""Test the evaluation of an input/output expression."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None, None,
'$("test_" + inputs.test_input)')],
- [StepOutput("test_task/output", "File", None, "$(inputs.test_input).bak")])
+ [StepOutput("test_task/output", "File", None, "$(inputs.test_input).bak")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
test_input = StepInput("test_input", "string", "test_input.txt", "default.txt",
"test_input", None, None, '$("test_" + inputs.test_input)')
@@ -553,17 +617,20 @@ def test_evaluate_expression(self):
def test_workflow_completed(self):
"""Test determining if a workflow has completed."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ workflow_id = generate_workflow_id()
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "test_task/output")])
- task = self.wfi.add_task(
- "test_task",
- "ls",
+ [OutputParameter("test_output", "File", "output.txt", "test_task/output")],
+ workflow_id))
+ task = Task(
+ "test_task", "ls", None, None,
[StepInput("test_input", "File", "input.txt", "default.txt", "test_input", None,
None, None)],
[StepOutput("test_task/output", "File", "output.txt",
- "output.txt")])
+ "output.txt")],
+ None, None, workflow_id)
+ self.wfi.add_task(task)
# Workflow not completed
self.assertFalse(self.wfi.workflow_completed())
@@ -576,29 +643,32 @@ def test_workflow_completed(self):
def test_workflow_initialized(self):
"""Test determining if a workflow is initialized."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ generate_workflow_id()))
# Workflow now initialized
self.assertTrue(self.wfi.workflow_initialized())
def test_workflow_loaded(self):
"""Test determining if a workflow is loaded."""
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ generate_workflow_id()))
self.wfi.finalize_workflow()
# Workflow not loaded
self.assertFalse(self.wfi.workflow_loaded())
- self.wfi.initialize_workflow(
- generate_workflow_id(), "test_workflow",
+ self.wfi.initialize_workflow(Workflow(
+ "test_workflow", None, None,
[InputParameter("test_input", "File", "input.txt")],
- [OutputParameter("test_output", "File", "output.txt", "viz/output")])
+ [OutputParameter("test_output", "File", "output.txt", "viz/output")],
+ generate_workflow_id()))
# Workflow now loaded
self.assertTrue(self.wfi.workflow_loaded())
@@ -612,47 +682,61 @@ def test_workflow_id(self):
self.assertEqual(self.wfi.workflow_id, self.wfi._workflow_id)
- def _create_test_tasks(self):
+ def _create_test_tasks(self, workflow_id):
"""Create test tasks to reduce redundancy."""
# Remember that add_task uploads the task to the database as well as returns a Task
tasks = [
- self.wfi.add_task(
- "data_prep", base_command=["ls", "-a", "-F"],
+ Task(
+ name="data_prep", base_command=["ls", "-a", "-F"],
+ hints=[Hint("ResourceRequirement", {"ramMax": 2048})],
+ requirements=[Requirement("NetworkAccess", {"networkAccess": True})],
inputs=[StepInput("test_input", "File", None, None, "test_input", "-l", None,
None)],
outputs=[StepOutput("prep/prep_output", "stdout", None, "prep_output.txt")],
+ stdout="prep_output.txt", stderr=None,
+ workflow_id=workflow_id),
+ Task(
+ name="compute0", base_command="touch",
+ hints=[Hint("ResourceRequirement", {"ramMax": 2048})],
requirements=[Requirement("NetworkAccess", {"networkAccess": True})],
- hints=[Hint("ResourceRequirement", {"ramMax": 2048})], stdout="prep_output.txt"),
- self.wfi.add_task(
- "compute0", base_command="touch",
inputs=[StepInput("input_data", "File", None, None, "prep/prep_output", None,
None, None)],
outputs=[StepOutput("compute0/output", "stdout", None, "output0.txt")],
+ stdout="output0.txt", stderr=None,
+ workflow_id=workflow_id),
+ Task(
+ name="compute1", base_command="find",
+ hints=[Hint("ResourceRequirement", {"ramMax": 2048})],
requirements=[Requirement("NetworkAccess", {"networkAccess": True})],
- hints=[Hint("ResourceRequirement", {"ramMax": 2048})], stdout="output0.txt"),
- self.wfi.add_task(
- "compute1", base_command="find",
inputs=[StepInput("input_data", "File", None, None, "prep/prep_output", None,
None, None)],
outputs=[StepOutput("compute1/output", "stdout", None, "output1.txt")],
+ stdout="output1.txt", stderr=None,
+ workflow_id=workflow_id),
+ Task(
+ name="compute2", base_command="grep",
+ hints=[Hint("ResourceRequirement", {"ramMax": 2048})],
requirements=[Requirement("NetworkAccess", {"networkAccess": True})],
- hints=[Hint("ResourceRequirement", {"ramMax": 2048})], stdout="output1.txt"),
- self.wfi.add_task(
- "compute2", base_command="grep",
inputs=[StepInput("input_data", "File", None, None, "prep/prep_output", None,
None, None)],
outputs=[StepOutput("compute2/output", "stdout", None, "output2.txt")],
+ stdout="output2.txt", stderr=None,
+ workflow_id=workflow_id),
+ Task(
+ name="visualization", base_command="python",
+ hints=[Hint("ResourceRequirement", {"ramMax": 2048})],
requirements=[Requirement("NetworkAccess", {"networkAccess": True})],
- hints=[Hint("ResourceRequirement", {"ramMax": 2048})], stdout="output2.txt"),
- self.wfi.add_task(
- "visualization", base_command="python",
inputs=[StepInput("input0", "File", None, None, "compute0/output", "-i", 1, None),
StepInput("input1", "File", None, None, "compute1/output", "-i", 2, None),
StepInput("input2", "File", None, None, "compute2/output", "-i", 3, None)],
outputs=[StepOutput("viz/output", "stdout", "viz_output.txt", "viz_output.txt")],
- requirements=[Requirement("NetworkAccess", {"networkAccess": True})],
- hints=[Hint("ResourceRequirement", {"ramMax": 2048})], stdout="viz_output.txt")
+ stdout="viz_output.txt", stderr=None,
+ workflow_id=workflow_id)
]
+
+ for task in tasks:
+ self.wfi.add_task(task)
+
return tasks
diff --git a/beeflow/tests/test_wf_manager.py b/beeflow/tests/test_wf_manager.py
index a864b5d17..7184a6073 100644
--- a/beeflow/tests/test_wf_manager.py
+++ b/beeflow/tests/test_wf_manager.py
@@ -4,9 +4,12 @@
import os
import pathlib
import pytest
+import jsonpickle
+
+from test_parser import WORKFLOW_GOLD, TASKS_GOLD
from beeflow.wf_manager.wf_manager import create_app
from beeflow.wf_manager.resources import wf_utils
-from beeflow.tests.mocks import MockWFI, MockCwlParser, MockGDBInterface
+from beeflow.tests.mocks import MockWFI, MockGDBInterface
from beeflow.common.config_driver import BeeConfig as bc
from beeflow.common.wf_interface import WorkflowInterface
@@ -69,7 +72,6 @@ def _resource(tag=""):
# WFList Tests
def test_submit_workflow(client, mocker, teardown_workflow, temp_db):
"""Test submitting a workflow."""
- mocker.patch('beeflow.wf_manager.resources.wf_list.CwlParser', new=MockCwlParser)
mocker.patch('beeflow.wf_manager.resources.wf_list.dep_manager.create_image',
return_value=True)
mocker.patch('beeflow.wf_manager.resources.wf_list.dep_manager.start_gdb', return_value=True)
@@ -85,12 +87,12 @@ def test_submit_workflow(client, mocker, teardown_workflow, temp_db):
tarball = script_path / 'clamr-wf.tgz'
with open(tarball, 'rb') as tarball_contents:
resp = client().post('/bee_wfm/v1/jobs/', data={
- 'wf_name': 'clamr',
+ 'wf_name': 'clamr'.encode(),
'wf_filename': tarball,
- 'main_cwl': 'clamr_wf.cwl',
- 'yaml': 'clamr_job.yml',
- 'workflow_archive': tarball_contents,
- 'workdir': '.'
+ 'workdir': '.',
+ 'workflow': jsonpickle.encode(WORKFLOW_GOLD),
+ 'tasks': jsonpickle.encode(TASKS_GOLD, warn=True),
+ 'workflow_archive': tarball_contents
})
# Remove task added during the test
@@ -99,7 +101,6 @@ def test_submit_workflow(client, mocker, teardown_workflow, temp_db):
def test_reexecute_workflow(client, mocker, teardown_workflow, temp_db):
"""Test reexecuting a workflow."""
- mocker.patch('beeflow.wf_manager.resources.wf_list.CwlParser', new=MockCwlParser)
mocker.patch('beeflow.wf_manager.resources.wf_list.dep_manager.create_image',
return_value=True)
mocker.patch('beeflow.wf_manager.resources.wf_list.dep_manager.start_gdb', return_value=True)
diff --git a/beeflow/wf_manager/resources/wf_list.py b/beeflow/wf_manager/resources/wf_list.py
index d1b585aff..5d71484a0 100644
--- a/beeflow/wf_manager/resources/wf_list.py
+++ b/beeflow/wf_manager/resources/wf_list.py
@@ -5,7 +5,6 @@
import os
import subprocess
-import traceback
import jsonpickle
from flask import make_response, jsonify
@@ -14,7 +13,6 @@
from beeflow.common import log as bee_logging
# from beeflow.common.wf_profiler import WorkflowProfiler
-from beeflow.common.parser import CwlParser, CwlParseError
from beeflow.wf_manager.resources import wf_utils
from beeflow.wf_manager.common import dep_manager
@@ -26,17 +24,6 @@
log = bee_logging.setup(__name__)
-def parse_workflow(wfi, wf_id, workflow_dir, main_cwl, yaml_file):
- """Run the parser."""
- parser = CwlParser(wfi)
- cwl_path = os.path.join(workflow_dir, main_cwl)
- if yaml_file is not None:
- yaml_path = os.path.join(workflow_dir, yaml_file)
- parser.parse_workflow(wf_id, cwl_path, yaml_path)
- else:
- parser.parse_workflow(wf_id, cwl_path)
-
-
# def initialize_wf_profiler(wf_name):
# # Initialize the workflow profiling code
# bee_workdir = wf_utils.get_bee_workdir()
@@ -89,24 +76,25 @@ def post(self):
reqparser = reqparse.RequestParser()
reqparser.add_argument('wf_name', type=str, required=True,
location='form')
- reqparser.add_argument('main_cwl', type=str, required=True,
- location='form')
- reqparser.add_argument('yaml', type=str, required=False,
- location='form')
reqparser.add_argument('wf_filename', type=str, required=True,
location='form')
reqparser.add_argument('workdir', type=str, required=True,
location='form')
+ reqparser.add_argument('workflow', type=str, required=True,
+ location='form')
+ reqparser.add_argument('tasks', type=str, required=True,
+ location='form')
reqparser.add_argument('workflow_archive', type=FileStorage, required=False,
location='files')
data = reqparser.parse_args()
wf_tarball = data['workflow_archive']
wf_filename = data['wf_filename']
- main_cwl = data['main_cwl']
wf_name = data['wf_name']
wf_workdir = data['workdir']
- # None if not sent
- yaml_file = data['yaml']
+ workflow = jsonpickle.decode(data['workflow'])
+ # May have to decode the list and task objects separately
+ tasks = [jsonpickle.decode(task) if isinstance(task, str) else task
+ for task in jsonpickle.decode(data['tasks'])]
try:
dep_manager.create_image()
@@ -116,7 +104,7 @@ def post(self):
resp = make_response(jsonify(msg=crt_message, status='error'), 418)
return resp
- wf_id = wf_data.generate_workflow_id()
+ wf_id = workflow.id
wf_dir = extract_wf(wf_id, wf_filename, wf_tarball)
bolt_port = wf_utils.get_open_port()
http_port = wf_utils.get_open_port()
@@ -125,18 +113,14 @@ def post(self):
db.workflows.add_workflow(wf_id, wf_name, 'Pending', wf_dir, bolt_port, gdb_pid)
dep_manager.wait_gdb(log)
- try:
- wfi = wf_utils.get_workflow_interface(wf_id)
- parse_workflow(wfi, wf_id, wf_dir, main_cwl, yaml_file)
- except CwlParseError as err:
- traceback.print_exc()
- log.error('Failed to parse file')
- return make_response(jsonify(msg=f'Parser: {err.args[0]}', status='error'), 400)
+ wfi = wf_utils.get_workflow_interface(wf_id)
+ wfi.initialize_workflow(workflow)
+
# initialize_wf_profiler(wf_name)
wf_utils.create_wf_metadata(wf_id, wf_name)
- _, tasks = wfi.get_workflow()
for task in tasks:
+ wfi.add_task(task)
metadata = wfi.get_task_metadata(task)
metadata['workdir'] = wf_workdir
wfi.set_task_metadata(task, metadata)
diff --git a/beeflow/wf_manager/resources/wf_utils.py b/beeflow/wf_manager/resources/wf_utils.py
index b0a91fa8f..edcf21899 100644
--- a/beeflow/wf_manager/resources/wf_utils.py
+++ b/beeflow/wf_manager/resources/wf_utils.py
@@ -12,7 +12,7 @@
from beeflow.common.gdb.neo4j_driver import Neo4jDriver
from beeflow.common.wf_interface import WorkflowInterface
from beeflow.common.connection import Connection
-
+from beeflow.common import paths
from beeflow.common.db import wfm_db
from beeflow.common.db.bdb import connect_db
@@ -152,7 +152,7 @@ def tm_url():
def _connect_tm():
"""Return a connection to the TM."""
- return Connection(bc.get('task_manager', 'socket'))
+ return Connection(paths.tm_socket())
def sched_url():
@@ -166,7 +166,7 @@ def sched_url():
def _connect_scheduler():
"""Return a connection to the Scheduler."""
- return Connection(bc.get('scheduler', 'socket'))
+ return Connection(paths.sched_socket())
def _resource(component, tag=""):
diff --git a/ci/README.md b/ci/README.md
index 804943650..028a6bd58 100644
--- a/ci/README.md
+++ b/ci/README.md
@@ -1,18 +1,23 @@
# CI code
This directory contains all the scripts that are needed for configuring and
-running BEE on a CI machine. The scripts here are as follows:
+running BEE on a CI machine. `BATCH_SCHEDULER` is set in the environment by the
+workflow to either `Slurm` or `Flux`, which is then used in various places in
+these scripts. The scripts are as follows:
* `env.sh`: CI environment set up
-* `bee_install.sh`: script for installing BEE
-* `bee_start.sh`: start script for BEE
-* `deps_install.sh`: BEE external dependency install script (this installs
- distro libs, as well as slurm)
-* `slurm_start.sh`: script for configuring and launching a single-machine slurm
- set up
-* `integration_test.sh`: external script for setting up the environment for the
- integration test
-* `integration_test.py`: actual Python integration testing code
+* `batch_scheduler.sh`: Install and set up a batch scheduler
+* `bee_install.sh`: Install BEE and python dependencies
+* `bee_config.sh`: Generate the bee.conf
+* `deps_install.sh`: Install external dependencies needed by BEE and batch schedulers
+* `flux_install.sh`: Install flux and dependencies
+* `inner_integration_test.sh`: Inner script for integration testing and running
+ with specific batch scheduler
+* `integration_test.py`: The actual integration test script; can be run locally
+* `integration_test.sh`: Outer script for integration testing called from the
+ github workflow
+* `slurm_start.sh`: Start the Slurm batch scheduler
+* `unit_tests.sh`: Run the unit tests
Note: The only script that you should be able to run locally without problems is
`integration_test.py`. The rest are designed for the CI environment and will
@@ -20,6 +25,6 @@ likely not work on a local machine.
## Integration tests
-The integrations tests are written as a Python script `integration_test.py`.
+The integration tests are written as a Python script `integration_test.py`.
This test can be run locally after you've started BEE with `beeflow`, by just
launching the script `./ci/integration_test.py`.
diff --git a/ci/batch_scheduler.sh b/ci/batch_scheduler.sh
new file mode 100755
index 000000000..a89e4c275
--- /dev/null
+++ b/ci/batch_scheduler.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+# Set up and start the batch scheduler
+
+case $BATCH_SCHEDULER in
+Slurm)
+ ./ci/slurm_start.sh
+ ;;
+Flux)
+ ./ci/flux_install.sh
+ ;;
+*)
+ printf "ERROR: Invalid batch scheduler '%s'\n" "$BATCH_SCHEDULER"
+ ;;
+esac
diff --git a/ci/bee_config.sh b/ci/bee_config.sh
new file mode 100755
index 000000000..469378517
--- /dev/null
+++ b/ci/bee_config.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+# BEE Configuration
+
+. ./ci/env.sh
+
+mkdir -p ~/.config/beeflow
+cat >> ~/.config/beeflow/bee.conf <> ~/.config/beeflow/bee.conf < install-poetry.sh
+chmod +x install-poetry.sh
+./install-poetry.sh
# Do a poetry install, making sure that all extras are added
poetry install -E cloud_extras || exit 1
-
-# BEE Configuration
-mkdir -p ~/.config/beeflow
-cat >> ~/.config/beeflow/bee.conf <`` command and query for the status of their tasks separately using ``beeclient query ``.
+You could then start each workflow with the ``beeflow start `` command and query for the status of their tasks separately using ``beeflow query ``.
diff --git a/docs/sphinx/commands.rst b/docs/sphinx/commands.rst
index eddaca23d..18cf0d8f5 100644
--- a/docs/sphinx/commands.rst
+++ b/docs/sphinx/commands.rst
@@ -1,34 +1,31 @@
-Commands
-************
+Command Line Interface
+**********************
-The main commands that you should be familiar with to use BEE are "**beeflow**" and "**beeclient**". Each of these have sub-commands to do various things with the client or daemon.
-The command "**beecfg**" is used to set up and validate your BEE configuration.
+BEE is controlled by one "**beeflow**" command with sub-commands to do various operations with the client or daemon.
-
-beeflow
+BEE Daemon
============
-**beeflow** is the command you will use to interact with the daemon process. The following are the options:
-
-``beeflow start``: Daemonize (if not in debug) and start all BEE components.
+To interact with the daemon process you'll need to use the ``beeflow core`` sub-command. The following are the options:
+``beeflow core start``: Daemonize (if not in debug) and start all BEE components.
Options:
-F, --foreground run in the foreground [default: False]
-``beeflow status``: Check the status of beeflow and the components.
+``beeflow core status``: Check the status of beeflow and the components.
-``beeflow stop``: Stop the current running beeflow daemon.
+``beeflow core stop``: Stop the current running beeflow daemon.
-``beeflow --version``: Display the version number of BEE.
+``beeflow core --version``: Display the version number of BEE.
-beeclient
-===========
+Submission and workflow commands
+================================
-**beeclient** is the command you will use to submit workflows and interact with your workflows. The following are the options:
+This section shows what commands you can use to submit and interact with your workflows. The following are the major options:
-``beeclient submit``: Submit a new workflow. By default this will also start
+``beeflow submit``: Submit a new workflow. By default this will also start
jobs immediately (unless passed the ``--no-start`` option). If either the MAIN_CWL or YAML
files are not contained immediately inside of WF_PATH, then the WF_PATH directory will
be copied into a temporary directory and the missing files will then be copied
@@ -42,62 +39,62 @@ Arguments:
- WORKDIR, working directory for workflow containing input + output files [required]
- ``--no-start``, don't start the workflow immediately
-``beeclient start``: Start a workflow with a workflow ID. Only needed if
-``beeclient submit`` was passed the ``--no-start`` option.
+``beeflow start``: Start a workflow with a workflow ID. Only needed if
+``beeflow submit`` was passed the ``--no-start`` option.
Arguments:
- WF_ID [required]
-``beeclient package``: Package a workflow into a tarball.
+``beeflow package``: Package a workflow into a tarball.
Arguments:
- WF_PATH, Path to the workflow package directory [required]
- PACKAGE_DEST, Path for where the packaged workflow should be saved [required]
-``beeclient listall``: List all workflows
+``beeflow list``: List all workflows
-``beeclient query``: Get the status of a workflow.
+``beeflow query``: Get the status of a workflow.
Arguments:
- WF_ID [required]
-``beeclient pause``: Pause a workflow (Running tasks will finish)
+``beeflow pause``: Pause a workflow (Running tasks will finish)
Arguments:
WF_ID [required]
-``beeclient resume``: Resume a paused workflow.
+``beeflow resume``: Resume a paused workflow.
Arguments:
WF_ID [required]
-``beeclient cancel``: Cancel a workflow.
+``beeflow cancel``: Cancel a workflow.
Arguments:
WF_ID [required]
-``beeclient copy``: Copy an archived workflow.
+``beeflow copy``: Copy an archived workflow.
Arguments:
WF_ID [required]
-``beeclient reexecute``: Reexecute an archived workflow.
+``beeflow reexecute``: Reexecute an archived workflow.
Arguments:
WF_ID [required]
-beecfg
-===========
+Generating and Managing Configuration Files
+===========================================
-**beecfg** is the command you will use to configure BEE for your workflows.
+You can use the ``beeflow config`` sub-command to configure BEE for your workflows. The following are further options for this sub-command:
-``beecfg validate``: Validate an existing configuration file.
+``beeflow config validate``: Validate an existing configuration file.
-``beecfg info``: Display some info about bee.conf's various options.
+``beeflow config info``: Display some info about bee.conf's various options.
-``beecfg new``: Create a new config file.
+``beeflow config new``: Create a new config file.
-``beecfg show``: Show the contents of current bee.conf.
+``beeflow config show``: Show the contents of current bee.conf.
diff --git a/docs/sphinx/examples.rst b/docs/sphinx/examples.rst
index dae729008..6c2d74c3a 100644
--- a/docs/sphinx/examples.rst
+++ b/docs/sphinx/examples.rst
@@ -71,8 +71,9 @@ sequence of commands:
.. code-block::
cd $WORKDIR_PATH
- beeclient package $BEE_PATH/examples/cat-grep-tar . # Tars up the workflow
- beeclient submit $NAME ./cat-grep-tar.tgz workflow.cwl input.yml $WORKDIR_PATH # Now submit the workflow
+ cp $BEE_PATH/examples/cat-grep-tar/lorem.txt .
+ beeflow package $BEE_PATH/examples/cat-grep-tar . # Tars up the workflow
+ beeflow submit $NAME ./cat-grep-tar.tgz workflow.cwl input.yml $WORKDIR_PATH # Now submit the workflow
This first command packages the workflow into a tarball, which makes it easy to
pass everything over to the Workflow Manager and finally submits the workflow,
@@ -88,15 +89,15 @@ the directory for the example by:
.. code-block::
cd $WORKDIR_PATH
- beeclient submit $NAME $BEE_PATH/examples/cat-grep-tar
- $BEE_PATH/examples/workflow.cwl $BEE_PATH/examples/input.yml
- $WORKDIR_PATH # Now submit the workflow
+ cp -r $BEE_PATH/examples/cat-grep-tar . #Copy example directory
+ cp cat-grep-tar/lorem.txt .
+ beeflow submit $NAME cat-grep-tar cat-grep-tar/workflow.cwl cat-grep-tar/input.yml $WORKDIR_PATH # Submits the workflow
This will automatically do the packaging and create an archive in the
background to be submitted.
Now the workflow should start up. While the workflow is running you can check
-the status by running a ``beeclient query $ID``. On completion, each step
+the status by running a ``beeflow query $ID``. On completion, each step
should be in a ``COMPLETED`` state. If you forgot to copy the lorem.txt file
to $WORKDIR_PATH the cat step will be in the ``FAILED`` state and the error will
be in the cat.err file.
@@ -123,7 +124,7 @@ of the ``occur*.txt`` files in a tarball. However, this is a useful sample of
the features a real-world workflow might need to use. For instance, the first
step might be producing some sort of output from a calculation, instead of just
copying the input to the output. The last step may also do some more processing
-to produce some sort of final file. If necessary, there can many more
+to produce some sort of final file. If necessary, there can be many more
processing steps than this simple example shows.
CLAMR workflow examples (containerized application)
@@ -147,7 +148,8 @@ CLAMR build workflow
--------------------
The workflow is in **/examples/clamr-ffmpeg-build**. You may want to explore the
-cwl files to understand the workflow specification for the example. Below is
+cwl files to understand the workflow specification for the example. The specification
+for the build of clamr in this example is for X86 hardware. Below is
the clamr step with the DockerRequirement in hints that specifies to build a
container from a dockerfile using Charliecloud (the container runtime specified
in the configuration file).
@@ -158,20 +160,20 @@ CWL for clamr step in examples/clamr-ffmpeg-build/clamr_wf.cwl
-Next we'll submit the CLAMR workflow from a directory of your choosing ($HOME)
+Next we'll submit the CLAMR workflow from a directory of your choosing,
+referred to as $WORKDIR_PATH,
on the same front-end where you started the components. If you have not started
the beeflow components, refer to :ref:`installation`.
In this example, instead of packaging up the workflow cwl files directory,
we've just listed the full path. This should auto-detect the directory and
-package it for you. Additionally, if the main_cwl and yaml files are not in
-the workflow directory, they will be copied into a temporary copy of the
-workflow directory before packaging. Compare this with the previous example.
-Other than the commands needed, this shouldn't affect the workflow in any way.
+package it for you.
.. code-block::
- beeclient submit clamr-example /examples/clamr-ffmpeg-build /clamr_wf.cwl /clamr_job.yml .
+ cd $WORKDIR_PATH
+ cp -r $BEE_PATH/examples/clamr-ffmpeg-build .
+ beeflow submit clamr-example clamr-ffmpeg-build clamr-ffmpeg-build/clamr_wf.cwl clamr-ffmpeg-build/clamr_job.yml $WORKDIR_PATH
Output:
@@ -190,7 +192,7 @@ pre-processing building phase and will only be performed once. In this example
both steps use the container that is built in the pre-processing stage. Once
the build has been completed the Charliecloud image will be in the container
archive location specified in the builder section of the bee configuration
-file. You can list contents of the configuration file using ``beecfg list``.
+file. You can list contents of the configuration file using ``bee config show``.
The status of the workflow will progress to completion and can be queried as
shown:
@@ -200,7 +202,7 @@ Check the status:
.. code-block::
- beeclient query fce80d
+ beeflow query fce80d
Output:
@@ -214,7 +216,7 @@ As the clamr task goes from READY to RUNNING, let's check the status again:
.. code-block::
- beeclient query fce80d
+ beeflow query fce80d
Output:
@@ -228,7 +230,7 @@ When the workflow has completed:
.. code-block::
- beeclient query fce80d
+ beeflow query fce80d
Output:
@@ -240,7 +242,7 @@ Output:
The archived workflow with associated job outputs will be in the
**bee_workdir**. See the default section of your configuration file (to list
-configuration file contents run ``beecfg list``). This workflow also produces
+configuration file contents run ``bee config show``). This workflow also produces
output from CLAMR and ffmpeg in the directory where you submitted the workflow :
.. code-block::
diff --git a/docs/sphinx/installation.rst b/docs/sphinx/installation.rst
index a5782d2d7..44bb7d7c9 100644
--- a/docs/sphinx/installation.rst
+++ b/docs/sphinx/installation.rst
@@ -11,7 +11,7 @@ Requirements:
* **Python version 3.8 (or greater)**
* `Charliecloud `_ **version 0.32 (or greater)**
- Charliecloud is installed on Los Alamos National Laboratory (LANL) clusters and can be invoked using the `load module` command. Charliecloud is also easily installed in user space and requires no privilege to install. BEE runs dependencies from a Charliecloud container and uses Charliecloud to run the graph database neo4j and other dependencies. The default container runtime for containerized applications in BEE is Charliecloud.
+ Charliecloud is installed on Los Alamos National Laboratory (LANL) clusters and can be invoked via ``module load charliecloud`` before running beeflow. If you are on a system that does not have the module, `Charliecloud `_ is easily installed in user space and requires no privileges to install. To insure Charliecloud is available in subsequent runs add ``module load charliecloud`` (or if you installed it ``export PATH=:$PATH``) to your .bashrc (or other appropriate shell initialization file). BEE runs dependencies from a Charliecloud container and uses it to run the graph database neo4j and other dependencies. The default container runtime for containerized applications in BEE is Charliecloud.
* **BEE dependency container**:
@@ -65,7 +65,7 @@ LANL systems you may use the BEE provided container:
**/usr/projects/BEE/neo4j-3-5-17-ch.tar.gz**). Depending on the system, you
may also need to know an account name to use.
-Once you are ready type ``beecfg new``.
+Once you are ready type ``beeflow config new``.
The bee.conf configuration file is a text file and you can edit it for your
needs.
@@ -74,8 +74,8 @@ needs.
systems have small quotas for home directories and containers can be large
files.**
-**beecfg** has other options including a configuration validator. For more
-information or help run: ``beecfg info`` or ``beecfg --help``.
+**beeflow config** has other options including a configuration validator. For more
+information or help run: ``beeflow config info`` or ``beeflow config --help``.
Starting up the BEE components:
-------------------------------
@@ -84,13 +84,13 @@ To start the components (scheduler, slurmrestd(SLURM only), workflow manager, an
.. code-block::
- beeflow start
+ beeflow core start
To check the status of the bee components run:
.. code-block::
- beeflow status
+ beeflow core status
.. code-block::
@@ -105,11 +105,11 @@ Some HPC systems have multiple front-ends. Run your workflows and components on
Stopping the BEE components:
-------------------------------
-If at some point you would like to stop the beeflow components, you should first verify that all workflows are complete (archived). (If there are pending workflows, it is also fine to stop the components because you can restart beeflow later and start pending workflows with the "beeclient start" command).
+If at some point you would like to stop the beeflow components, you should first verify that all workflows are complete (archived). (If there are pending workflows, it is also fine to stop the components because you can restart beeflow later and start pending workflows with the "beeflow start" command).
.. code-block::
- beeclient listall
+ beeflow list
.. code-block::
@@ -121,4 +121,4 @@ Now stop the components.
.. code-block::
- beeflow stop
+ beeflow core stop
diff --git a/docs/sphinx/visualization.rst b/docs/sphinx/visualization.rst
index 957803e22..cd408b419 100644
--- a/docs/sphinx/visualization.rst
+++ b/docs/sphinx/visualization.rst
@@ -32,7 +32,7 @@ Also take a look at the README in that directory if you need more information.
Running
=======
-From the frontend where BEE has been launched, you need to run ``beeclient
+From the frontend where BEE has been launched, you need to run ``beeflow
metadata ${WF_ID}``, where ``WF_ID`` is the ID of a submitted workflow. You can
safely ignore all the information displayed here, except for the ``bolt_port``
option which will be needed later.
diff --git a/docs/sphinx/wf_api.rst b/docs/sphinx/wf_api.rst
index 5ce762e34..2a2a3434b 100644
--- a/docs/sphinx/wf_api.rst
+++ b/docs/sphinx/wf_api.rst
@@ -3,12 +3,12 @@ BEEflow API
BEEflow
===========================
-.. automodule:: beeflow.cli
+.. automodule:: beeflow.client.bee_client
:members:
-BEEclient
+BEEflow Core
===========================
-.. automodule:: beeflow.client.bee_client
+.. automodule:: beeflow.client.core
:members:
Builder (Container)
diff --git a/pyproject.toml b/pyproject.toml
index e6d986e59..95c5f08b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[tool.poetry]
name = "hpc-beeflow"
-version = "0.1.4"
-description = "A software package for containerizing HPC applications and managing job workflow"
+version = "0.1.5"
+description = "A software package for containerizing HPC applications and managing job workflows"
authors = [
@@ -43,10 +43,8 @@ packages = [
]
[tool.poetry.scripts]
-beeflow = 'beeflow.cli:main'
-beeclient = 'beeflow.client.bee_client:main'
+beeflow = 'beeflow.client.bee_client:main'
beecloud = 'beeflow.cloud_launcher:main'
-beecfg = 'beeflow.common.config_driver:main'
[tool.poetry.dependencies]
# Python version (>=3.8.3, <3.11)
@@ -56,7 +54,7 @@ python = ">=3.8.3,<=3.11"
Flask = { version = "^2.0" }
Jinja2 = { version = "<3.1" }
neo4j = { version = "^1.7.4" }
-PyYAML = { version = "^5.1.1" }
+PyYAML = { version = "^6.0.1" }
flask_restful = "0.3.9"
cwl-utils = "^0.16"
APScheduler = "^3.6.3"
@@ -69,6 +67,8 @@ gunicorn = "^20.1.0"
# typer version 0.6 and above seem to be throwing an AssertionError with no
# attached info
typer = "^0.5.0"
+# Seems to be required for Flux
+cffi = "^1.15.1"
# Cloud optional dependencies
google-api-python-client = { version = "^2.66.0", optional = true }