Skip to content

Commit

Permalink
updated timeouts. updated tests
Browse files Browse the repository at this point in the history
  • Loading branch information
XaverStiensmeier committed Jul 4, 2024
1 parent ed45bd0 commit c186223
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 29 deletions.
2 changes: 1 addition & 1 deletion bibigrid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
#features: # list

# elastic_scheduling: # for large or slow clusters increasing these timeouts might be necessary to avoid failures
# SuspendTimeout: 30 # after SuspendTimeout seconds, slurm allows to power up the node again
# SuspendTimeout: 60 # after SuspendTimeout seconds, slurm allows to power up the node again
# ResumeTimeout: 1200 # if a node doesn't start in ResumeTimeout seconds, the start is considered failed.

#- [next configurations]
2 changes: 1 addition & 1 deletion bibigrid/core/utility/ansible_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"server_name": "bibigrid", "admin_password": "bibigrid"}
SLURM_CONF = {"db": "slurm", "db_user": "slurm", "db_password": "changeme",
"munge_key": id_generation.generate_munge_key(),
"elastic_scheduling": {"SuspendTime": 3600, "ResumeTimeout": 1200, "SuspendTimeout": 30,
"elastic_scheduling": {"SuspendTime": 3600, "ResumeTimeout": 1200, "SuspendTimeout": 60,
"TreeWidth": 128}}
CLOUD_SCHEDULING = {"sshTimeout": 5}

Expand Down
4 changes: 2 additions & 2 deletions documentation/markdown/features/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ For nearly all cases the default value is what you need. Default is `True`.
```yaml
elastic_scheduling:
SuspendTime: 1800
SuspendTimeout: 60
SuspendTimeout: 90
ResumeTimeout: 1800
```

Expand All @@ -173,7 +173,7 @@ slurmConf:
munge_key: # automatically generated via id_generation.generate_munge_key
elastic_scheduling:
SuspendTime: 900 # if a node is not used for SuspendTime seconds, it will shut down
SuspendTimeout: 30 # after SuspendTimeout seconds, slurm allows to power up the powered down node again
SuspendTimeout: 60 # after SuspendTimeout seconds, slurm allows to power up the powered down node again
ResumeTimeout: 900 # if a node doesn't start in ResumeTimeout seconds, the start is considered failed. See https://slurm.schedmd.com/slurm.conf.html#OPT_ResumeProgram
TreeWidth: 128 # https://slurm.schedmd.com/slurm.conf.html#OPT_TreeWidth
```
Expand Down
2 changes: 1 addition & 1 deletion resources/defaults/slurm/slurm.j2
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ ResumeTimeout= {{ slurm_conf.elastic_scheduling.ResumeTimeout }}
SuspendProgram=/opt/slurm/terminate.sh
# Suspend time's default is 1 hour (3600 seconds)
SuspendTime= {{ slurm_conf.elastic_scheduling.SuspendTime }}
# SuspendTimeout's default is 30 seconds
# SuspendTimeout's default is 90 seconds
SuspendTimeout={{ slurm_conf.elastic_scheduling.SuspendTimeout }}
# Excludes {{ hostvars[groups.master.0].name }} from suspend
SuspendExcNodes={{ exclude_groups | join(',') }}
Expand Down
44 changes: 22 additions & 22 deletions tests/test_ansible_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,14 @@ def test_generate_common_configuration_false(self):
default_user = "ubuntu"
ssh_user = "test"
configuration = [{}]
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 4},
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 5},
'cluster_cidrs': cidrs, 'cluster_id': cluster_id, 'default_user': default_user,
'dns_server_list': ['8.8.8.8'], 'enable_ide': False, 'enable_nfs': False,
'enable_slurm': False, 'enable_zabbix': False, 'local_dns_lookup': False,
'local_fs': False, 'slurm': True,
'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm',
'elastic_scheduling': {'ResumeTimeout': 900, 'SuspendTime': 3600,
'TreeWidth': 128},
'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600,
'SuspendTimeout': 60, 'TreeWidth': 128},
'munge_key': 'TO_BE_FILLED'}, 'ssh_user': ssh_user,
'use_master_as_compute': True}
generated_common_configuration = ansible_configurator.generate_common_configuration_yaml(cidrs, configuration,
Expand All @@ -124,16 +124,16 @@ def test_generate_common_configuration_true(self):
ssh_user = "test"
configuration = [
{elem: "True" for elem in ["localFS", "localDNSlookup", "useMasterAsCompute", "slurm", "zabbix", "ide"]}]
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 4},
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 5},
'cluster_cidrs': cidrs, 'cluster_id': cluster_id, 'default_user': default_user,
'dns_server_list': ['8.8.8.8'], 'enable_ide': 'True', 'enable_nfs': False,
'enable_slurm': 'True', 'enable_zabbix': 'True',
'ide_conf': {'build': False, 'ide': False, 'port_end': 8383, 'port_start': 8181,
'workspace': '${HOME}'}, 'local_dns_lookup': 'True',
'local_fs': 'True', 'slurm': 'True',
'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm',
'elastic_scheduling': {'ResumeTimeout': 900, 'SuspendTime': 3600,
'TreeWidth': 128},
'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600,
'SuspendTimeout': 60, 'TreeWidth': 128},
'munge_key': 'TO_BE_FILLED'}, 'ssh_user': ssh_user,
'use_master_as_compute': 'True',
'zabbix_conf': {'admin_password': 'bibigrid', 'db': 'zabbix',
Expand All @@ -152,16 +152,16 @@ def test_generate_common_configuration_nfs_shares(self):
cluster_id = "21"
default_user = "ubuntu"
ssh_user = "test"
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 4},
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 5},
'cluster_cidrs': cidrs, 'cluster_id': cluster_id, 'default_user': default_user,
'dns_server_list': ['8.8.8.8'], 'enable_ide': False, 'enable_nfs': 'True',
'enable_slurm': False, 'enable_zabbix': False, 'ext_nfs_mounts': [],
'local_dns_lookup': False, 'local_fs': False,
'nfs_mounts': [{'dst': '//vil/mil', 'src': '//vil/mil'},
{'dst': '//vol/spool', 'src': '//vol/spool'}], 'slurm': True,
'nfs_mounts': [{'dst': '/vil/mil', 'src': '/vil/mil'},
{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True,
'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm',
'elastic_scheduling': {'ResumeTimeout': 900, 'SuspendTime': 3600,
'TreeWidth': 128},
'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600,
'SuspendTimeout': 60, 'TreeWidth': 128},
'munge_key': 'TO_BE_FILLED'}, 'ssh_user': ssh_user,
'use_master_as_compute': True}
generated_common_configuration = ansible_configurator.generate_common_configuration_yaml(cidrs, configuration,
Expand All @@ -177,15 +177,15 @@ def test_generate_common_configuration_nfs(self):
cluster_id = "21"
default_user = "ubuntu"
ssh_user = "test"
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 4},
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 5},
'cluster_cidrs': cidrs, 'cluster_id': cluster_id, 'default_user': default_user,
'dns_server_list': ['8.8.8.8'], 'enable_ide': False, 'enable_nfs': 'True',
'enable_slurm': False, 'enable_zabbix': False, 'ext_nfs_mounts': [],
'local_dns_lookup': False, 'local_fs': False,
'nfs_mounts': [{'dst': '//vol/spool', 'src': '//vol/spool'}], 'slurm': True,
'nfs_mounts': [{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True,
'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm',
'elastic_scheduling': {'ResumeTimeout': 900, 'SuspendTime': 3600,
'TreeWidth': 128},
'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600,
'SuspendTimeout': 60, 'TreeWidth': 128},
'munge_key': 'TO_BE_FILLED'}, 'ssh_user': ssh_user,
'use_master_as_compute': True}
generated_common_configuration = ansible_configurator.generate_common_configuration_yaml(cidrs, configuration,
Expand All @@ -201,16 +201,16 @@ def test_generate_common_configuration_ext_nfs_shares(self):
cluster_id = "21"
default_user = "ubuntu"
ssh_user = "test"
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 4},
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 5},
'cluster_cidrs': cidrs, 'cluster_id': cluster_id, 'default_user': default_user,
'dns_server_list': ['8.8.8.8'], 'enable_ide': False, 'enable_nfs': 'True',
'enable_slurm': False, 'enable_zabbix': False,
'ext_nfs_mounts': [{'dst': '/vil/mil', 'src': '/vil/mil'}],
'local_dns_lookup': False, 'local_fs': False,
'nfs_mounts': [{'dst': '//vol/spool', 'src': '//vol/spool'}], 'slurm': True,
'nfs_mounts': [{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True,
'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm',
'elastic_scheduling': {'ResumeTimeout': 900, 'SuspendTime': 3600,
'TreeWidth': 128},
'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600,
'SuspendTimeout': 60, 'TreeWidth': 128},
'munge_key': 'YryJVnqgg24Ksf8zXQtbct3nuXrMSi9N'},
'ssh_user': ssh_user, 'use_master_as_compute': True}
generated_common_configuration = ansible_configurator.generate_common_configuration_yaml(cidrs, configuration,
Expand All @@ -226,16 +226,16 @@ def test_generate_common_configuration_ide(self):
cluster_id = "21"
default_user = "ubuntu"
ssh_user = "test"
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 4},
common_configuration_yaml = {'bibigrid_version': version.__version__, 'cloud_scheduling': {'sshTimeout': 5},
'cluster_cidrs': cidrs, 'cluster_id': cluster_id, 'default_user': default_user,
'dns_server_list': ['8.8.8.8'], 'enable_ide': 'Some1', 'enable_nfs': False,
'enable_slurm': False, 'enable_zabbix': False,
'ide_conf': {'build': False, 'ide': False, 'key1': 'Some2', 'port_end': 8383,
'port_start': 8181, 'workspace': '${HOME}'},
'local_dns_lookup': False, 'local_fs': False, 'slurm': True,
'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm',
'elastic_scheduling': {'ResumeTimeout': 900, 'SuspendTime': 3600,
'TreeWidth': 128},
'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600,
'SuspendTimeout': 60, 'TreeWidth': 128},
'munge_key': 'b7nks3Ur3kanyPAEBxfSC9ypfSHFnWJL'},
'ssh_user': ssh_user, 'use_master_as_compute': True}
generated_common_configuration = ansible_configurator.generate_common_configuration_yaml(cidrs, configuration,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_initialize_master(self, mock_execute_ssh):
'username': creator.ssh_user,
'commands': creator.ssh_add_public_key_commands + ssh_handler.ANSIBLE_SETUP,
'filepaths': [(create.KEY_FOLDER + creator.key_name, '.ssh/id_ecdsa')],
'gateway': {}, 'timeout': 4}
'gateway': {}, 'timeout': 5}
mock_execute_ssh.assert_called_with(ssh_data, startup.LOG)

def test_prepare_volumes_none(self):
Expand Down Expand Up @@ -211,7 +211,7 @@ def test_upload_playbooks(self, mock_execute_ssh, mock_ac_ssh, mock_configure_an
'username': creator.ssh_user,
'commands': [mock_ac_ssh()] + ssh_handler.ANSIBLE_START,
'filepaths': create.FILEPATHS,
'gateway': {}, 'timeout': 4}
'gateway': {}, 'timeout': 5}
mock_execute_ssh.assert_called_with(ssh_data=ssh_data, log=startup.LOG)

@patch.object(create.Create, "generate_keypair")
Expand Down

0 comments on commit c186223

Please sign in to comment.