Skip to content

Commit

Permalink
fixed master running slurmd but not being listed in slurm.conf. Now s…
Browse files Browse the repository at this point in the history
…et to drained.
  • Loading branch information
XaverStiensmeier committed Jun 27, 2024
1 parent de145dc commit 02aab94
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 2 additions & 0 deletions bibigrid/core/utility/ansible_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log): # py
"gateway_ip": configuration["private_v4"],
"cloud_identifier": configuration["cloud_identifier"],
"on_demand": worker.get("onDemand", True),
"state": "CLOUD",
"partitions": worker.get("partitions", []) + ["all", configuration["cloud_identifier"]]}

worker_features = worker.get("features", [])
Expand Down Expand Up @@ -138,6 +139,7 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log): # py
"flavor": flavor_dict, "private_v4": configuration["private_v4"],
"cloud_identifier": configuration["cloud_identifier"], "volumes": configuration["volumes"],
"fallback_on_other_image": configuration.get("fallbackOnOtherImage", False),
"state": "UNKNOWN" if configuration.get("useMasterAsCompute", True) else "DRAINED",
"on_demand": False,
"partitions": master.get("partitions", []) + ["all", configuration["cloud_identifier"]]}
if configuration.get("wireguard_peer"):
Expand Down
7 changes: 4 additions & 3 deletions resources/defaults/slurm/slurm.j2
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,20 @@ SlurmdDebug=info
SlurmdLogFile=/var/log/slurm/slurmd.log

# COMPUTE NODES
# use_master_as_compute
{% set partitions = {} %}
{% set exclude_groups = [] %}
{% set master_or_empty = groups.master if use_master_as_compute else [] %}
{% set node_groups = [] %}
{% for node_name in master_or_empty + groups.workers %}
{% for node_name in groups.master + groups.workers %}
{% set node = hostvars[node_name] %}
{% if node.name not in node_groups %}
{% if not node.on_demand %}
{% set _ = exclude_groups.append(node.name) %}
{% endif %}
{% set _ = node_groups.append(node.name) %}
{% set mem = (node.flavor.ram // 1024) * 1000 %}
NodeName={{ node.name }} SocketsPerBoard={{ node.flavor.vcpus }} CoresPerSocket=1 RealMemory={{ mem - [mem // 2, 16000] | min }} State={{ 'CLOUD' if node.on_demand else 'UNKNOWN' }} {{"Features=" + (node.features | join(",")) if node.features is defined }}# {{ node.cloud_identifier }}
# {{ node }}
NodeName={{ node.name }} SocketsPerBoard={{ node.flavor.vcpus }} CoresPerSocket=1 RealMemory={{ mem - [mem // 2, 16000] | min }} State={{node.state }} {{"Features=" + (node.features | join(",")) if node.features is defined }}# {{ node.cloud_identifier }}
{% for partition in node.partitions %}
{% if partition not in partitions %}
{% set _ = partitions.update({partition: []}) %}
Expand Down

0 comments on commit 02aab94

Please sign in to comment.