Skip to content

Commit

Permalink
Add crontab capability (ufs-community#110)
Browse files Browse the repository at this point in the history
* add rocoto launch script

* modify crotab py script

* add new crontab line

* fix typo

* fix cron add func

* fix module load issue on crontab

* add log file to crontab command

* add modules to launch script

* update modules for orion

* change hpc list in pr templeate
  • Loading branch information
chan-hoo authored Jun 10, 2024
1 parent 1bfa037 commit ba8f0c9
Show file tree
Hide file tree
Showing 4 changed files with 356 additions and 9 deletions.
5 changes: 2 additions & 3 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@ EXAMPLE: Closes ufs-community/land-DA/issues/<issue_number>
- RDHPCS
- [ ] Hera
- [ ] Orion
- [ ] Jet
- [ ] Gaea
- [ ] Cheyenne
- [ ] Hercules
- [ ] Derecho
- CI
- [ ] Completed
- PW-Clouds
Expand Down
7 changes: 1 addition & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,8 @@ parm/*.out
parm/*.err
parm/*.log
parm/land_analysis.yaml
parm/log.rocoto_launch

__pycache__
*.swp

cycle.log
err_noahmp.*
log_noahmp.*
*.namelist
exp_out/*
analdates.sh
244 changes: 244 additions & 0 deletions parm/get_crontab_contents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
#!/usr/bin/env python3

import os
import sys
import argparse
import subprocess
from textwrap import dedent, indent
from logging import getLogger
from datetime import datetime

def get_crontab_contents(machine, debug):
"""
This function returns the contents of the user's cron table, as well as the command used to
manipulate the cron table. Typically this latter value will be `crontab`, but on some
platforms the version or location of this may change depending on other circumstances.
Args:
machine (str) : The name of the current machine
debug (bool): True will give more verbose output
Returns:
crontab_cmd (str) : String containing the "crontab" command for this machine
crontab_contents (str) : String containing the contents of the user's cron table.
"""

crontab_cmd = "crontab"

print(
f"""
Getting crontab content with command:
=========================================================
{crontab_cmd} -l
========================================================="""
)

(_, crontab_contents, _) = run_command(f"{crontab_cmd} -l")

if crontab_contents.startswith('no crontab for'):
crontab_contents=''

print(
f"""
Crontab contents:
=========================================================
{crontab_contents}
========================================================="""
)

# replace single quotes (hopefully in comments) with double quotes
crontab_contents = crontab_contents.replace("'", '"')

return crontab_cmd, crontab_contents


def add_crontab_line(machine, crontab_line, debug):
"""Add crontab line to cron table"""

# Get crontab contents
crontab_cmd, crontab_contents = get_crontab_contents(machine, debug)

# Need to omit commented crontab entries for later logic
lines = crontab_contents.split('\n')
cronlines = []
for line in lines:
comment = False
for char in line:
if char == "#":
comment = True
break
elif char.isspace():
continue
else:
# If we find a character that isn't blank or comment, then this is a normal line
break
if not comment:
cronlines.append(line)
# Re-join all the separate lines into a multiline string again
crontab_no_comments = """{}""".format("\n".join(cronlines))
if crontab_line in crontab_no_comments:
log_info(
f"""
The following line already exists in the cron table and thus will not be
added:
crontab_line = '{crontab_line}'"""
)
else:
log_info(
f"""
Adding the following line to the user's cron table in order to automatically
resubmit SRW workflow:
crontab_line = '{crontab_line}'""",
verbose=debug,
)

# add new line to crontab contents if it doesn't have one
newline_char = ""
if crontab_contents and crontab_contents[-1] != "\n":
newline_char = "\n"

# add the crontab line
run_command(
f"""printf "%s%b%s\n" '{crontab_contents}' '{newline_char}' '{crontab_line}' | {crontab_cmd}"""
)


def delete_crontab_line(machine, crontab_line, debug):
"""Delete crontab line after job is complete i.e. either SUCCESS/FAILURE
but not IN PROGRESS status"""

#
# Get the full contents of the user's cron table.
#
(crontab_cmd, crontab_contents) = get_crontab_contents(machine, debug)
#
# Remove the line in the contents of the cron table corresponding to the
# current forecast experiment (if that line is part of the contents).
# Then record the results back into the user's cron table.
#
print(
f"""
Crontab contents before delete:
=========================================================
{crontab_contents}
========================================================="""
)

if crontab_line in crontab_contents:
#Try removing with a newline first, then fall back to without newline
crontab_contents = crontab_contents.replace(crontab_line + "\n", "")
crontab_contents = crontab_contents.replace(crontab_line, "")
else:
print(f"\nWARNING: line not found in crontab, nothing to remove:\n {crontab_line}\n")

run_command(f"""echo '{crontab_contents}' | {crontab_cmd}""")

print(
f"""
Crontab contents after delete:
=========================================================
{crontab_contents}
========================================================="""
)


def parse_args(argv):
"""Parse command line arguments for deleting crontab line.
This is needed because it is called from shell script.
If 'delete' argument is not passed, print the crontab contents
"""
parser = argparse.ArgumentParser(description="Crontab job manipulation program.")

parser.add_argument(
"-d",
"--debug",
action="store_true",
help="Print debug output",
)

parser.add_argument(
"-a",
"--add",
action="store_true",
help="Add specified crontab line.",
)

parser.add_argument(
"-r",
"--remove",
action="store_true",
help="Remove specified crontab line.",
)

parser.add_argument(
"-l",
"--line",
help="Line to remove from crontab. If --remove/add not specified, has no effect",
)

parser.add_argument(
"-m",
"--machine",
help="Machine name",
required=True
)

# Check that inputs are correct and consistent
args = parser.parse_args(argv)

if args.remove or args.add:
if args.line is None:
raise argparse.ArgumentTypeError("--line is a required argument if --remove/add is specified")

return args

def run_command(cmd):
"""Run system command in a subprocess
Args:
cmd: command to execute
Returns:
Tuple of (exit code, std_out, std_err)
"""
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
)

std_out, std_err = proc.communicate()

# strip trailing newline character
return (proc.returncode, std_out.rstrip("\n"), std_err.rstrip("\n"))

def log_info(info_msg, verbose=True, dedent_=True):
"""Function to print information message using the logging module. This function
should not be used if python logging has not been initialized.
Args:
info_msg : info message to print
verbose : set to False to silence printing
dedent_ : set to False to disable "dedenting" (print string as-is)
Returns:
None
"""

# "sys._getframe().f_back.f_code.co_name" returns the name of the calling function
logger = getLogger(sys._getframe().f_back.f_code.co_name)

if verbose:
if dedent_:
logger.info(indent(dedent(info_msg), " "))
else:
logger.info(info_msg)

if __name__ == "__main__":
args = parse_args(sys.argv[1:])
if args.remove:
delete_crontab_line(args.machine,args.line,args.debug)
elif args.add:
add_crontab_line(args.machine,args.line,args.debug)
else:
_,out = get_crontab_contents(args.machine,args.debug)
print_info_msg(out)
109 changes: 109 additions & 0 deletions parm/launch_rocoto_wflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/bin/bash -l

# Set shell options.
set -u

# Set path
PARMdir=$(cd "$(dirname "$(readlink -f -n "${BASH_SOURCE[0]}" )" )" && pwd -P)
source ${PARMdir}/detect_platform.sh

# Load rocoto
if [ "${MACHINE}" == "hera" ]; then
module load rocoto
elif [ "${MACHINE}" == "orion" ]; then
source ${PARMdir}/../versions/run.ver_${MACHINE}
module use $modulepath_spack_stack
module load stack-intel/$stack_intel_ver
module load stack-python/$stack_python_ver
module load contrib
module load rocoto
else
echo "FATAL ERROR: modules are not loaded"
fi

# Set file names.
WFLOW_XML_FN="land_analysis.xml"
rocoto_xml_bn=$( basename "${WFLOW_XML_FN}" ".xml" )
rocoto_database_fn="${rocoto_xml_bn}.db"
WFLOW_LOG_FN="log.rocoto_launch"

# Initialize the default status of the workflow to "IN PROGRESS".
wflow_status="IN PROGRESS"

# crontab line
CRONTAB_LINE="*/2 * * * * cd ${PARMdir} && ./launch_rocoto_wflow.sh >> ${WFLOW_LOG_FN}"

if [ "$#" -eq 1 ] && [ "$1" == "add" ]; then
msg="The crontab line is added:
CRONTAB_LINE = \"${CRONTAB_LINE}\"
"

${PARMdir}/get_crontab_contents.py --add -m=${MACHINE} -l="${CRONTAB_LINE}" -d
printf "%s" "$msg"
fi

cd "${PARMdir}"
rocotorun_cmd="rocotorun -w \"${WFLOW_XML_FN}\" -d \"${rocoto_database_fn}\""
eval ${rocotorun_cmd}

rocotostat_output=$( rocotostat -w ${WFLOW_XML_FN} -d ${rocoto_database_fn} )

while read -r line; do
if echo "$line" | grep -q "DEAD"; then
wflow_status="FAILURE"
break
fi
done <<< ${rocotostat_output}

# Print out rocotostat
printf "%s" "${rocotostat_output}" > ${WFLOW_LOG_FN}

# rocotostat with -s for cycle info
rocotostat_s_output=$( rocotostat -w ${WFLOW_XML_FN} -d ${rocoto_database_fn} -s )

regex_search="^[ ]*([0-9]+)[ ]+([A-Za-z]+)[ ]+.*"
cycle_str=()
cycle_status=()
i=0
while read -r line; do
if [ $i -gt 0 ]; then
im1=$((i-1))
cycle_str[im1]=$( echo "$line" | sed -r -n -e "s/${regex_search}/\1/p" )
cycle_status[im1]=$( echo "$line" | sed -r -n -e "s/${regex_search}/\2/p" )
fi
i=$((i+1))
done <<< "${rocotostat_s_output}"

# Get the number of cycles
num_cycles_total=${#cycle_str[@]}
num_cycles_completed=0
for (( i=0; i<=$((num_cycles_total-1)); i++ )); do
if [ "${cycle_status[i]}" = "Done" ]; then
num_cycles_completed=$((num_cycles_completed+1))
fi
done

# Check whether all cycles are complete
if [ ${num_cycles_completed} -eq ${num_cycles_total} ]; then
wflow_status="SUCCESS"
fi

# Print out result
printf "%s" "
Summary of workflow status:
=====================================================
${num_cycles_completed} out of ${num_cycles_total} cycles completed.
Workflow status: ${wflow_status}
=====================================================
" >> ${WFLOW_LOG_FN}

# Remove crontab line
if [ "${wflow_status}" = "SUCCESS" ] || [ "${wflow_status}" = "FAILURE" ]; then
msg="The crontab line is removed:
CRONTAB_LINE = \"${CRONTAB_LINE}\" "

${PARMdir}/get_crontab_contents.py --remove -m=${MACHINE} -l="${CRONTAB_LINE}" -d

printf "%s" "$msg" >> ${WFLOW_LOG_FN}
fi

0 comments on commit ba8f0c9

Please sign in to comment.