-
Notifications
You must be signed in to change notification settings - Fork 501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[k8s] On-demand single-host TPU support on GKE #3947
base: master
Are you sure you want to change the base?
Changes from 56 commits
a929474
80e1877
70a07ab
0cba9a5
17bcbd8
9233bf5
12e62c0
c795fe7
1c895f0
a8f5b6b
bdb3469
1d2d243
92f4f38
2662ec8
58f8ad6
1cf82b6
7b551c9
81a05ee
e8764f1
3497aee
724806a
e8d73fe
7ac5036
4470dbe
0860e45
35f3c80
2b56a9e
305705c
c2b5bfc
2ba5537
92cd77d
d085a5b
7860679
96924a7
1bbac21
4f7ea03
16b6c29
ad5089f
aa8efc3
e390843
884f0a2
ee28466
11142e5
de55663
a500555
bce8731
0e8366c
f67ad0f
05c37aa
06d3879
9a2046c
62b235f
4db1e63
5923f10
fa2e670
c1ee117
cbce4d5
241efc0
61b01d1
2fbb4eb
5dc92f3
9e8d53d
932e073
0a0eac2
3bc95b9
9dbaa72
f5e1d37
688c0b4
2dec7f9
dc23e88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,16 +84,16 @@ def list_accelerators_realtime( | |
) or not kubernetes_utils.check_credentials(context)[0]: | ||
return {}, {}, {} | ||
|
||
has_gpu = kubernetes_utils.detect_gpu_resource(context) | ||
has_gpu = kubernetes_utils.detect_accelerator_resource(context) | ||
if not has_gpu: | ||
return {}, {}, {} | ||
|
||
label_formatter, _ = kubernetes_utils.detect_gpu_label_formatter(context) | ||
if not label_formatter: | ||
lf, _ = kubernetes_utils.detect_gpu_label_formatter(context) | ||
if not lf: | ||
return {}, {}, {} | ||
|
||
accelerators_qtys: Set[Tuple[str, int]] = set() | ||
key = label_formatter.get_label_key() | ||
keys = lf.get_label_keys() | ||
nodes = kubernetes_utils.get_kubernetes_nodes(context) | ||
# Get the pods to get the real-time GPU usage | ||
pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(context) | ||
|
@@ -104,56 +104,64 @@ def list_accelerators_realtime( | |
min_quantity_filter = quantity_filter if quantity_filter else 1 | ||
|
||
for node in nodes: | ||
if key in node.metadata.labels: | ||
allocated_qty = 0 | ||
accelerator_name = label_formatter.get_accelerator_from_label_value( | ||
node.metadata.labels.get(key)) | ||
|
||
# Check if name_filter regex matches the accelerator_name | ||
regex_flags = 0 if case_sensitive else re.IGNORECASE | ||
if name_filter and not re.match( | ||
name_filter, accelerator_name, flags=regex_flags): | ||
continue | ||
|
||
accelerator_count = int( | ||
node.status.allocatable.get('nvidia.com/gpu', 0)) | ||
|
||
# Generate the GPU quantities for the accelerators | ||
if accelerator_name and accelerator_count > 0: | ||
for count in range(1, accelerator_count + 1): | ||
accelerators_qtys.add((accelerator_name, count)) | ||
|
||
for pod in pods: | ||
# Get all the pods running on the node | ||
if (pod.spec.node_name == node.metadata.name and | ||
pod.status.phase in ['Running', 'Pending']): | ||
# Iterate over all the containers in the pod and sum the | ||
# GPU requests | ||
for container in pod.spec.containers: | ||
if container.resources.requests: | ||
allocated_qty += int( | ||
container.resources.requests.get( | ||
'nvidia.com/gpu', 0)) | ||
|
||
accelerators_available = accelerator_count - allocated_qty | ||
|
||
if accelerator_count >= min_quantity_filter: | ||
quantized_count = (min_quantity_filter * | ||
(accelerator_count // min_quantity_filter)) | ||
if accelerator_name not in total_accelerators_capacity: | ||
total_accelerators_capacity[ | ||
accelerator_name] = quantized_count | ||
else: | ||
total_accelerators_capacity[ | ||
accelerator_name] += quantized_count | ||
|
||
if accelerator_name not in total_accelerators_available: | ||
total_accelerators_available[accelerator_name] = 0 | ||
if accelerators_available >= min_quantity_filter: | ||
quantized_availability = min_quantity_filter * ( | ||
accelerators_available // min_quantity_filter) | ||
total_accelerators_available[ | ||
accelerator_name] += quantized_availability | ||
for key in keys: | ||
if key in node.metadata.labels: | ||
allocated_qty = 0 | ||
accelerator_name = lf.get_accelerator_from_label_value( | ||
node.metadata.labels.get(key)) | ||
|
||
# Exclude multi-host TPUs from being processed. | ||
# TODO(Doyoung): Remove the logic when adding support for | ||
# multi-host TPUs. | ||
if kubernetes_utils.is_multi_host_tpu(node.metadata.labels): | ||
continue | ||
|
||
# Check if name_filter regex matches the accelerator_name | ||
regex_flags = 0 if case_sensitive else re.IGNORECASE | ||
if name_filter and not re.match( | ||
name_filter, accelerator_name, flags=regex_flags): | ||
continue | ||
|
||
# Generate the GPU quantities for the accelerators | ||
accelerator_count = ( | ||
kubernetes_utils.get_node_accelerator_count( | ||
node.status.allocatable)) | ||
if accelerator_name and accelerator_count > 0: | ||
for count in range(1, accelerator_count + 1): | ||
accelerators_qtys.add((accelerator_name, count)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe a quick way of addressing the show-gpus issue is to change this logic to show only the exact count, not the range if it is type TPU:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cblmemo @romilbhardwaj fixed at cbce4d5
|
||
|
||
for pod in pods: | ||
# Get all the pods running on the node | ||
if (pod.spec.node_name == node.metadata.name and | ||
pod.status.phase in ['Running', 'Pending']): | ||
# Iterate over all the containers in the pod and sum | ||
# the GPU requests | ||
for container in pod.spec.containers: | ||
if container.resources.requests: | ||
allocated_qty += ( | ||
kubernetes_utils.get_node_accelerator_count( | ||
container.resources.requests)) | ||
|
||
accelerators_available = accelerator_count - allocated_qty | ||
|
||
if accelerator_count >= min_quantity_filter: | ||
quantized_count = ( | ||
min_quantity_filter * | ||
(accelerator_count // min_quantity_filter)) | ||
if accelerator_name not in total_accelerators_capacity: | ||
total_accelerators_capacity[ | ||
accelerator_name] = quantized_count | ||
else: | ||
total_accelerators_capacity[ | ||
accelerator_name] += quantized_count | ||
|
||
if accelerator_name not in total_accelerators_available: | ||
total_accelerators_available[accelerator_name] = 0 | ||
if accelerators_available >= min_quantity_filter: | ||
quantized_availability = min_quantity_filter * ( | ||
accelerators_available // min_quantity_filter) | ||
total_accelerators_available[ | ||
accelerator_name] += quantized_availability | ||
|
||
result = [] | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we say sth like
detected xxx node that is using multi-host gpu, skip showing them
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cblmemo I'm not convinced on why this is needed on top of the minimal noting message I already added for the following reasons:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My main concern here is that the message
Multi-host TPUs are not supported.
does not convey the meaning of "we excluded some nodes from your cluster", which might confuse the users.One way is to count the number of nodes / or number of TPUs and show
xxx nodes with multi-host TPU setup is not included / excluded in the resrouces
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cblmemo I see. That makes sense and I agree to the concern. I extended a message so that the users are noted that the Multi-host TPU nodes in their GKE cluster are excluded from the display.