Skip to content

Commit

Permalink
[develop] Add Gaea C5 to supported platforms (ufs-community#898)
Browse files Browse the repository at this point in the history
Modulefiles and other configuration files that complete porting the SRW to Gaea C5 system.

---------

Co-authored-by: Natalie Perlin <Natalie.Perlin@noaa.gov>
  • Loading branch information
natalie-perlin and Natalie Perlin authored Sep 27, 2023
1 parent 87dbf19 commit 0b1b070
Show file tree
Hide file tree
Showing 17 changed files with 156 additions and 13 deletions.
14 changes: 7 additions & 7 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ pipeline {
parameters {
// Allow job runner to filter based on platform
// Use the line below to enable all PW clusters
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
// Use the line below to enable the PW AWS cluster
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'gaea', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use')
// Allow job runner to filter based on compiler
choice(name: 'SRW_COMPILER_FILTER', choices: ['all', 'gnu', 'intel'], description: 'Specify the compiler(s) to use to build')
// Uncomment the following line to re-enable comprehensive tests
Expand Down Expand Up @@ -77,8 +77,8 @@ pipeline {
axes {
axis {
name 'SRW_PLATFORM'
// values 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'hercules'//, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
// values 'cheyenne', 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'gaea-c5', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand All @@ -92,7 +92,7 @@ pipeline {
exclude {
axis {
name 'SRW_PLATFORM'
values 'gaea', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'gaea-c5', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand Down
5 changes: 5 additions & 0 deletions .cicd/scripts/wrapper_srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ if [[ "${SRW_PLATFORM}" == gaea ]]; then
sed -i 's|qos=batch|qos=windfall|g' ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh
fi

if [[ "${SRW_PLATFORM}" == gaea-c5 ]]; then
sed -i '15i #SBATCH --clusters=c5' ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh
sed -i 's|qos=batch|qos=normal|g' ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh
fi

# Call job card and return job_id
echo "Running: ${workflow_cmd} -A ${SRW_PROJECT} ${arg_1} ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh"
job_id=$(${workflow_cmd} -A ${SRW_PROJECT} ${arg_1} ${WORKSPACE}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh)
Expand Down
2 changes: 1 addition & 1 deletion devbuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ set -eu
# automatically determine compiler
if [ -z "${COMPILER}" ] ; then
case ${PLATFORM} in
jet|hera|gaea) COMPILER=intel ;;
jet|hera|gaea|gaea-c5) COMPILER=intel ;;
orion) COMPILER=intel ;;
wcoss2) COMPILER=intel ;;
cheyenne) COMPILER=intel ;;
Expand Down
3 changes: 3 additions & 0 deletions etc/lmod-setup.csh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ else if ( "$L_MACHINE" == singularity ) then
else if ( "$L_MACHINE" == gaea ) then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.csh

else if ( "$L_MACHINE" == gaea-c5 ) then
source /lustre/f2/dev/role.epic/contrib/Lmod_init_C5.csh

else if ( "$L_MACHINE" == derecho ) then
module reset

Expand Down
3 changes: 3 additions & 0 deletions etc/lmod-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ elif [ "$L_MACHINE" = singularity ]; then
elif [ "$L_MACHINE" = gaea ]; then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh

elif [ "$L_MACHINE" = gaea-c5 ]; then
source /lustre/f2/dev/role.epic/contrib/Lmod_init_C5.sh

elif [ "$L_MACHINE" = derecho ]; then
module reset

Expand Down
30 changes: 30 additions & 0 deletions modulefiles/build_gaea-c5_intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
help([[
This module loads libraries for building the UFS SRW App on
the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0
]])

whatis([===[Loads libraries needed for building the UFS SRW App on Gaea C5 ]===])

load(pathJoin("cmake", os.getenv("cmake_ver") or "3.23.1"))

prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/hpc-stack/intel-classic-2023.1.0/modulefiles/stack")
load(pathJoin("hpc", os.getenv("hpc_ver") or "1.2.0"))
load(pathJoin("intel-classic", os.getenv("intel_classic_ver") or "2023.1.0"))
load(pathJoin("cray-mpich", os.getenv("cray_mpich_ver") or "8.1.25"))
load(pathJoin("hpc-intel-classic", os.getenv("hpc_intel_classic_ver") or "2023.1.0"))
load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "8.1.25"))

load("srw_common")

unload("darshan-runtime/3.4.0")
setenv("CFLAGS","-diag-disable=10441")
setenv("FFLAGS","-diag-disable=10441")

setenv("CC","cc")
setenv("FC","ftn")
setenv("CXX","CC")
setenv("CMAKE_C_COMPILER","cc")
setenv("CMAKE_Fortran_COMPILER","ftn")
setenv("CMAKE_CXX_COMPILER","CC")
setenv("CMAKE_Platform","gaea-c5.intel")

4 changes: 4 additions & 0 deletions modulefiles/tasks/gaea-c5/plot_allvars.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow")
5 changes: 5 additions & 0 deletions modulefiles/tasks/gaea-c5/python_srw.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unload("miniconda3")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "workflow_tools")
6 changes: 6 additions & 0 deletions modulefiles/tasks/gaea-c5/run_vx.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--[[
Compiler-specific modules are used for met and metplus libraries
--]]
load(pathJoin("met", os.getenv("met_ver") or "10.1.2"))
load(pathJoin("metplus", os.getenv("metplus_ver") or "4.1.3"))
load("python_srw")
21 changes: 21 additions & 0 deletions modulefiles/wflow_gaea-c5.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
help([[
This module loads python environement for running the UFS SRW App on
the NOAA RDHPC machine Gaea C5
]])

whatis([===[Loads libraries needed for running the UFS SRW App on gaea ]===])

unload("python")
load("set_pythonpath")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/C5/rocoto/modulefiles")
load("rocoto")

pushenv("MKLROOT", "/opt/intel/oneapi/mkl/2023.1.0/")

if mode() == "load" then
LmodMsgRaw([===[Please do the following to activate conda:
> conda activate workflow_tools
]===])
end
11 changes: 11 additions & 0 deletions tests/WE2E/machine_suites/coverage.gaea-c5
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
community
custom_ESGgrid_NewZealand_3km
grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_RAP
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR
grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km
grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_HRRR
grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_SUBCONUS_Ind_3km_ics_RAP_lbcs_RAP_suite_RRFS_v1beta_plot
nco_ensemble
nco_grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km
2 changes: 1 addition & 1 deletion tests/WE2E/setup_WE2E_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function usage {

}

machines=( hera jet cheyenne derecho orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne derecho orion wcoss2 gaea gaea-c5 odin singularity macos noaacloud )

if [ "$1" = "-h" ] ; then usage ; fi
[[ $# -le 2 ]] && usage
Expand Down
4 changes: 2 additions & 2 deletions tests/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ function usage() {
echo
exit 1
}

machines=( hera jet cheyenne derecho orion hercules wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne derecho orion hercules wcoss2 gaea gaea-c5 odin singularity macos noaacloud )

[[ $# -gt 4 ]] && usage

Expand Down
1 change: 1 addition & 0 deletions ush/load_modules_run_task.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ module list

if [ -n "${SRW_ENV:-}" ] ; then
set +u
conda deactivate
conda activate ${SRW_ENV}
set -u
fi
Expand Down
54 changes: 54 additions & 0 deletions ush/machine/gaea-c5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
platform:
WORKFLOW_MANAGER: rocoto
NCORES_PER_NODE: 128
SCHED: slurm
TEST_CCPA_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc
TEST_MRMS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/mrms/proc
TEST_NDAS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ndas/proc
DOMAIN_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen
QUEUE_DEFAULT: normal
QUEUE_FCST: normal
QUEUE_HPSS: normal
REMOVE_MEMORY: True
PARTITION_HPSS: eslogin_c5
RUN_CMD_FCST: srun --export=ALL -n ${PE_MEMBER01}
RUN_CMD_POST: srun --export=ALL -n $nprocs
RUN_CMD_PRDGEN: srun --export=ALL -n $nprocs
RUN_CMD_SERIAL: time
RUN_CMD_UTILS: srun --export=ALL -n $nprocs
SCHED_NATIVE_CMD: --clusters=c5 --partition=batch --export=NONE
SCHED_NATIVE_CMD_HPSS: --clusters=es --partition=eslogin_c5 --export=NONE
PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }'
TEST_EXTRN_MDL_SOURCE_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data
TEST_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen
TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_VX_FCST_INPUT_BASEDIR: '{{ "/lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}'
FIXaer: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_aer
FIXgsi: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_gsi
FIXgsm: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_am
FIXlut: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_lut
FIXorg: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_orog
FIXsfc: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo
FIXshp: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/NaturalEarth
EXTRN_MDL_DATA_STORES: aws
data:
ics_lbcs:
FV3GFS:
nemsio: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh}
grib2: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh}
netcdf: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/netcdf/${yyyymmdd}${hh}
RAP: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh}
HRRR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh}
RAP: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh}
GSMGFS: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh}
rocoto:
tasks:
metatask_run_ensemble:
task_run_fcst_mem#mem#:
cores: '{{ task_run_fcst.PE_MEMBER01 // 1 }}'
native: '--cpus-per-task {{ task_run_fcst.OMP_NUM_THREADS_RUN_FCST|int }} --exclusive {{ platform.SCHED_NATIVE_CMD }}'
nodes:
nnodes:
nodesize:
ppn:
2 changes: 1 addition & 1 deletion ush/retrieve_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def download_file(url):
# -c continue previous attempt
# -T timeout seconds
# -t number of tries
cmd = f"wget -q -c -T 10 -t 2 {url}"
cmd = f"wget -q -c -T 15 -t 2 {url}"
logging.debug(f"Running command: \n {cmd}")
try:
subprocess.run(
Expand Down
2 changes: 1 addition & 1 deletion ush/valid_param_vals.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
valid_vals_RUN_ENVIR: ["nco", "community"]
valid_vals_VERBOSE: [True, False]
valid_vals_DEBUG: [True, False]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA", "GAEA-C5"]
valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"]
valid_vals_FCST_MODEL: ["ufs-weather-model"]
valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"]
Expand Down

0 comments on commit 0b1b070

Please sign in to comment.