Skip to content

Commit

Permalink
[k8s] Fix GPU count detection from autoscaler YAML (#2636)
Browse files Browse the repository at this point in the history
* fix gpu count detection in yaml

* comments
  • Loading branch information
romilbhardwaj authored Nov 22, 2023
1 parent c0600bc commit 76da783
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
14 changes: 10 additions & 4 deletions sky/skylet/providers/kubernetes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,17 @@ def get_resource(container_resources: Dict[str, Any],
# float('inf') means there's no limit set
res_count = request if limit == float('inf') else limit
# Convert to int since Ray autoscaler expects int.
# Cap the minimum resource to 1 because if resource count is set to 0,
# (e.g., when request=0.5), ray will not be able to schedule any tasks.
# We also round up the resource count to the nearest integer to provide the
# user at least the amount of resource they requested.
return max(1, math.ceil(res_count))
rounded_count = math.ceil(res_count)
if resource_name == 'cpu':
# For CPU, we set minimum count to 1 because if CPU count is set to 0,
# (e.g. when the user sets --cpu 0.5), ray will not be able to schedule
# any tasks.
return max(1, rounded_count)
else:
# For GPU and memory, return the rounded count.
return rounded_count


def _get_resource(container_resources: Dict[str, Any], resource_name: str,
Expand All @@ -144,7 +150,7 @@ def _get_resource(container_resources: Dict[str, Any], resource_name: str,
Args:
container_resources: Container's resource field.
resource_name: One of 'cpu', 'gpu' or memory.
resource_name: One of 'cpu', 'gpu' or 'memory'.
field_name: One of 'requests' or 'limits'.
Returns:
Expand Down
1 change: 0 additions & 1 deletion tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -2964,7 +2964,6 @@ def test_skyserve_cancel():


# ------- Testing user ray cluster --------
@pytest.mark.no_kubernetes # Kubernetes does not support sky status -r yet.
def test_user_ray_cluster(generic_cloud: str):
name = _get_cluster_name()
test = Test(
Expand Down

0 comments on commit 76da783

Please sign in to comment.