skypilot-org · landscapepainter · Sep 16, 2024 · Sep 16, 2024 · Sep 16, 2024 · Sep 17, 2024
diff --git a/sky/cli.py b/sky/cli.py
@@ -3106,7 +3106,8 @@ def _get_kubernetes_realtime_gpu_table(
                            'in Kubernetes cluster. ')
                 debug_msg = ('To show available accelerators on kubernetes,'
                              ' run: sky show-gpus --cloud kubernetes ')
-            full_err_msg = (err_msg + kubernetes_utils.NO_GPU_HELP_MESSAGE +
+            full_err_msg = (err_msg +
+                            kubernetes_utils.NO_ACCELERATOR_HELP_MESSAGE +
                             debug_msg)
             raise ValueError(full_err_msg)
         for gpu, _ in sorted(counts.items()):
@@ -3123,9 +3124,9 @@ def _get_kubernetes_node_info_table(context: Optional[str]):
         node_info_dict = kubernetes_utils.get_kubernetes_node_info(context)
         for node_name, node_info in node_info_dict.items():
             node_table.add_row([
-                node_name, node_info.gpu_type,
-                node_info.total['nvidia.com/gpu'],
-                node_info.free['nvidia.com/gpu']
+                node_name, node_info.accelerator_type,
+                node_info.total['accelerator_count'],
+                node_info.free['accelerators_available']
             ])
         return node_table
 
@@ -3179,8 +3180,16 @@ def _output():
                     yield from k8s_realtime_table.get_string()
                     k8s_node_table = _get_kubernetes_node_info_table(context)
                     yield '\n\n'
+                    # TODO(Doyoung): Update the message with the multi-host TPU
+                    # support.
+                    k8s_per_node_acc_message = (
+                        'Kubernetes per node accelerator availability ')
+                    if kubernetes_utils.multi_host_tpu_exists_in_cluster(
+                            context):
+                        k8s_per_node_acc_message += (
+                            '(Note: Multi-host TPUs are not supported.)')
-                    # TODO(Doyoung): Update the message with the multi-host TPU
-                    # support.
-                    k8s_per_node_acc_message = (
-                        'Kubernetes per node accelerator availability ')
-                    if kubernetes_utils.multi_host_tpu_exists_in_cluster(
-                            context):
-                        k8s_per_node_acc_message += (
-                            '(Note: Multi-host TPUs are not supported.)')
+                    # TODO(Doyoung): Update the message with the multi-host TPU
+                    # support.
+                    maybe_tpu_multi_host_hint = ''
+                    if kubernetes_utils.multi_host_tpu_exists_in_cluster(
+                            context):
+                        maybe_tpu_multi_host_hint = f'Detected {xxx} node...'
-                    # TODO(Doyoung): Update the message with the multi-host TPU
-                    # support.
-                    k8s_per_node_acc_message = (
-                        'Kubernetes per node accelerator availability ')
-                    if kubernetes_utils.multi_host_tpu_exists_in_cluster(
-                            context):
-                        k8s_per_node_acc_message += (
-                            '(Note: Multi-host TPUs are not supported.)')
+                    # TODO(Doyoung): Update the message with the multi-host TPU
+                    # support.
+                    maybe_tpu_multi_host_hint = ''
+                    if kubernetes_utils.multi_host_tpu_exists_in_cluster(
+                            context):
+                        maybe_tpu_multi_host_hint = f'Detected {xxx} node...'
                     yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                           f'Kubernetes per node GPU availability'
+                           f'{k8s_per_node_acc_message}'
                            f'{colorama.Style.RESET_ALL}\n')
                     yield from k8s_node_table.get_string()
                 if kubernetes_autoscaling:

diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py
@@ -365,11 +365,19 @@ def make_deploy_resources_variables(
 
         k8s_acc_label_key = None
         k8s_acc_label_value = None
+        k8s_topology_label_key = None
+        k8s_topology_label_value = None
+        tpu_requested = False
 
-        # If GPUs are requested, set node label to match the GPU type.
+        # If GPU/TPUs are requested, set node label to match the GPU/TPU type.
         if acc_count > 0 and acc_type is not None:
-            k8s_acc_label_key, k8s_acc_label_value = \
-                kubernetes_utils.get_gpu_label_key_value(context, acc_type)
+            (k8s_acc_label_key, k8s_acc_label_value, k8s_topology_label_key,
+             k8s_topology_label_value) = (
+                 kubernetes_utils.get_accelerator_label_key_value(
+                     context, acc_type, acc_count))
+            if (k8s_acc_label_key ==
+                    kubernetes_utils.GKELabelFormatter.TPU_LABEL_KEY):
+                tpu_requested = True
 
         port_mode = network_utils.get_port_mode(None)
 
@@ -431,6 +439,9 @@ def make_deploy_resources_variables(
             'k8s_skypilot_system_namespace': _SKYPILOT_SYSTEM_NAMESPACE,
             'k8s_spot_label_key': spot_label_key,
             'k8s_spot_label_value': spot_label_value,
+            'tpu_requested': tpu_requested,
+            'k8s_topology_label_key': k8s_topology_label_key,
+            'k8s_topology_label_value': k8s_topology_label_value,
             'image_id': image_id,
         }
 

diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -84,16 +84,16 @@ def list_accelerators_realtime(
     ) or not kubernetes_utils.check_credentials(context)[0]:
         return {}, {}, {}
 
-    has_gpu = kubernetes_utils.detect_gpu_resource(context)
+    has_gpu = kubernetes_utils.detect_accelerator_resource(context)
     if not has_gpu:
         return {}, {}, {}
 
-    label_formatter, _ = kubernetes_utils.detect_gpu_label_formatter(context)
-    if not label_formatter:
+    lf, _ = kubernetes_utils.detect_gpu_label_formatter(context)
+    if not lf:
         return {}, {}, {}
 
     accelerators_qtys: Set[Tuple[str, int]] = set()
-    key = label_formatter.get_label_key()
+    keys = lf.get_label_keys()
     nodes = kubernetes_utils.get_kubernetes_nodes(context)
     # Get the pods to get the real-time GPU usage
     pods = kubernetes_utils.get_all_pods_in_kubernetes_cluster(context)
@@ -104,56 +104,64 @@ def list_accelerators_realtime(
     min_quantity_filter = quantity_filter if quantity_filter else 1
 
     for node in nodes:
-        if key in node.metadata.labels:
-            allocated_qty = 0
-            accelerator_name = label_formatter.get_accelerator_from_label_value(
-                node.metadata.labels.get(key))
-
-            # Check if name_filter regex matches the accelerator_name
-            regex_flags = 0 if case_sensitive else re.IGNORECASE
-            if name_filter and not re.match(
-                    name_filter, accelerator_name, flags=regex_flags):
-                continue
-
-            accelerator_count = int(
-                node.status.allocatable.get('nvidia.com/gpu', 0))
-
-            # Generate the GPU quantities for the accelerators
-            if accelerator_name and accelerator_count > 0:
-                for count in range(1, accelerator_count + 1):
-                    accelerators_qtys.add((accelerator_name, count))
-
-            for pod in pods:
-                # Get all the pods running on the node
-                if (pod.spec.node_name == node.metadata.name and
-                        pod.status.phase in ['Running', 'Pending']):
-                    # Iterate over all the containers in the pod and sum the
-                    # GPU requests
-                    for container in pod.spec.containers:
-                        if container.resources.requests:
-                            allocated_qty += int(
-                                container.resources.requests.get(
-                                    'nvidia.com/gpu', 0))
-
-            accelerators_available = accelerator_count - allocated_qty
-
-            if accelerator_count >= min_quantity_filter:
-                quantized_count = (min_quantity_filter *
-                                   (accelerator_count // min_quantity_filter))
-                if accelerator_name not in total_accelerators_capacity:
-                    total_accelerators_capacity[
-                        accelerator_name] = quantized_count
-                else:
-                    total_accelerators_capacity[
-                        accelerator_name] += quantized_count
-
-            if accelerator_name not in total_accelerators_available:
-                total_accelerators_available[accelerator_name] = 0
-            if accelerators_available >= min_quantity_filter:
-                quantized_availability = min_quantity_filter * (
-                    accelerators_available // min_quantity_filter)
-                total_accelerators_available[
-                    accelerator_name] += quantized_availability
+        for key in keys:
+            if key in node.metadata.labels:
+                allocated_qty = 0
+                accelerator_name = lf.get_accelerator_from_label_value(
+                    node.metadata.labels.get(key))
+
+                # Exclude multi-host TPUs from being processed.
+                # TODO(Doyoung): Remove the logic when adding support for
+                # multi-host TPUs.
+                if kubernetes_utils.is_multi_host_tpu(node.metadata.labels):
+                    continue
+
+                # Check if name_filter regex matches the accelerator_name
+                regex_flags = 0 if case_sensitive else re.IGNORECASE
+                if name_filter and not re.match(
+                        name_filter, accelerator_name, flags=regex_flags):
+                    continue
+
+                # Generate the GPU quantities for the accelerators
+                accelerator_count = (
+                    kubernetes_utils.get_node_accelerator_count(
+                        node.status.allocatable))
+                if accelerator_name and accelerator_count > 0:
+                    for count in range(1, accelerator_count + 1):
+                        accelerators_qtys.add((accelerator_name, count))
+
+                for pod in pods:
+                    # Get all the pods running on the node
+                    if (pod.spec.node_name == node.metadata.name and
+                            pod.status.phase in ['Running', 'Pending']):
+                        # Iterate over all the containers in the pod and sum
+                        # the GPU requests
+                        for container in pod.spec.containers:
+                            if container.resources.requests:
+                                allocated_qty += (
+                                    kubernetes_utils.get_node_accelerator_count(
+                                        container.resources.requests))
+
+                accelerators_available = accelerator_count - allocated_qty
+
+                if accelerator_count >= min_quantity_filter:
+                    quantized_count = (
+                        min_quantity_filter *
+                        (accelerator_count // min_quantity_filter))
+                    if accelerator_name not in total_accelerators_capacity:
+                        total_accelerators_capacity[
+                            accelerator_name] = quantized_count
+                    else:
+                        total_accelerators_capacity[
+                            accelerator_name] += quantized_count
+
+                if accelerator_name not in total_accelerators_available:
+                    total_accelerators_available[accelerator_name] = 0
+                if accelerators_available >= min_quantity_filter:
+                    quantized_availability = min_quantity_filter * (
+                        accelerators_available // min_quantity_filter)
+                    total_accelerators_available[
+                        accelerator_name] += quantized_availability
 
     result = []
 

diff --git a/sky/clouds/utils/gcp_utils.py b/sky/clouds/utils/gcp_utils.py
@@ -17,6 +17,7 @@
 from sky import sky_logging
 from sky import skypilot_config
 from sky.provision.gcp import constants
+from sky.provision.kubernetes import utils as kubernetes_utils
 from sky.utils import subprocess_utils
 
 if typing.TYPE_CHECKING:
@@ -35,7 +36,10 @@ def is_tpu(resources: Optional['resources_lib.Resources']) -> bool:
 def is_tpu_vm(resources: Optional['resources_lib.Resources']) -> bool:
     if not is_tpu(resources):
         return False
-    assert resources is not None
+    assert (resources is not None and len(resources.accelerators) == 1)
+    acc, _ = list(resources.accelerators.items())[0]
+    if kubernetes_utils.is_tpu_on_gke(acc):
+        return False
     if resources.accelerator_args is None:
         return True
     return resources.accelerator_args.get('tpu_vm', True)