Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OCI] Re-Implementation with the new provision API framework. #4119

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4003,12 +4003,11 @@ def teardown_no_lock(self,
region = config['provider']['region']

# pylint: disable=import-outside-toplevel
from ray.autoscaler.tags import TAG_RAY_CLUSTER_NAME

from sky.skylet.providers.oci.query_helper import oci_query_helper
from sky.provision.constants import TAG_RAY_CLUSTER_NAME
from sky.provision.oci.query_utils import query_helper

# 0: All terminated successfully, failed count otherwise
returncode = oci_query_helper.terminate_instances_by_tags(
returncode = query_helper.terminate_instances_by_tags(
{TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud}, region)
cblmemo marked this conversation as resolved.
Show resolved Hide resolved

# To avoid undefined local variables error.
Expand Down
25 changes: 8 additions & 17 deletions sky/clouds/oci.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from sky.adaptors import oci as oci_adaptor
from sky.clouds import service_catalog
from sky.clouds.utils import oci_utils
from sky.provision.oci.query_utils import query_helper
from sky.utils import common_utils
from sky.utils import resources_utils
from sky.utils import ux_utils
Expand Down Expand Up @@ -61,6 +62,9 @@ class OCI(clouds.Cloud):
{resources_utils.DiskTier.ULTRA})
_BEST_DISK_TIER = resources_utils.DiskTier.HIGH

PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
STATUS_VERSION = clouds.StatusVersion.SKYPILOT

@classmethod
def _unsupported_features_for_resources(
cls, resources: 'resources_lib.Resources'
Expand Down Expand Up @@ -436,7 +440,7 @@ def check_credentials(cls) -> Tuple[bool, Optional[str]]:
return True, None
except (oci_adaptor.oci.exceptions.ConfigFileNotFound,
oci_adaptor.oci.exceptions.InvalidConfig,
oci_adaptor.service_exception()) as e:
oci_adaptor.oci.exceptions.ServiceError) as e:
return False, (
f'OCI credential is not correctly set. '
f'Check the credential file at {conf_file}\n'
Expand Down Expand Up @@ -596,32 +600,19 @@ def query_status(cls, name: str, tag_filters: Dict[str, str],
region: Optional[str], zone: Optional[str],
**kwargs) -> List[status_lib.ClusterStatus]:
del zone, kwargs # Unused.
# Check the lifecycleState definition from the page
# https://docs.oracle.com/en-us/iaas/api/#/en/iaas/latest/Instance/
status_map = {
'PROVISIONING': status_lib.ClusterStatus.INIT,
'STARTING': status_lib.ClusterStatus.INIT,
'RUNNING': status_lib.ClusterStatus.UP,
'STOPPING': status_lib.ClusterStatus.STOPPED,
'STOPPED': status_lib.ClusterStatus.STOPPED,
'TERMINATED': None,
'TERMINATING': None,
}

# pylint: disable=import-outside-toplevel
from sky.skylet.providers.oci.query_helper import oci_query_helper

status_list = []
try:
vms = oci_query_helper.query_instances_by_tags(
tag_filters=tag_filters, region=region)
vms = query_helper.query_instances_by_tags(tag_filters=tag_filters,
region=region)
except Exception as e: # pylint: disable=broad-except
with ux_utils.print_exception_no_traceback():
raise exceptions.ClusterStatusFetchingError(
f'Failed to query OCI cluster {name!r} status. '
'Details: '
f'{common_utils.format_exception(e, use_bracket=True)}')

status_map = oci_utils.oci_config.STATE_MAPPING_OCI_TO_SKY
for node in vms:
vm_status = node.lifecycle_state
if vm_status in status_map:
cblmemo marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
2 changes: 1 addition & 1 deletion sky/clouds/service_catalog/oci_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _get_df() -> 'pd.DataFrame':
logger.debug(f'It is OK goes here when testing: {str(e)}')
subscribed_regions = []

except oci_adaptor.service_exception() as e:
except oci_adaptor.oci.exceptions.ServiceError as e:
# Should never expect going here. However, we still catch
# it so that if any OCI call failed, the program can still
# proceed with try-and-error way.
Expand Down
15 changes: 13 additions & 2 deletions sky/clouds/utils/oci_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
- Hysun He (hysun.he@oracle.com) @ Oct, 2024: Add default image OS
configuration.
"""
import logging
import os

from sky import sky_logging
from sky import skypilot_config
from sky import status_lib
from sky.utils import resources_utils

logger = logging.getLogger(__name__)
logger = sky_logging.init_logger(__name__)


class OCIConfig:
Expand Down Expand Up @@ -77,6 +78,16 @@ class OCIConfig:
resources_utils.DiskTier.HIGH: DISK_TIER_HIGH,
}

STATE_MAPPING_OCI_TO_SKY = {
cblmemo marked this conversation as resolved.
Show resolved Hide resolved
'PROVISIONING': status_lib.ClusterStatus.INIT,
'STARTING': status_lib.ClusterStatus.INIT,
'RUNNING': status_lib.ClusterStatus.UP,
'STOPPING': status_lib.ClusterStatus.STOPPED,
'STOPPED': status_lib.ClusterStatus.STOPPED,
'TERMINATED': None,
'TERMINATING': None,
}

@classmethod
def get_compartment(cls, region):
# Allow task(cluster)-specific compartment/VCN parameters.
Expand Down
1 change: 1 addition & 0 deletions sky/provision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from sky.provision import gcp
from sky.provision import kubernetes
from sky.provision import lambda_cloud
from sky.provision import oci
from sky.provision import runpod
from sky.provision import vsphere
from sky.utils import command_runner
Expand Down
15 changes: 15 additions & 0 deletions sky/provision/oci/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""OCI provisioner for SkyPilot.

History:
- Hysun He (hysun.he@oracle.com) @ Oct.16, 2024: Initial implementation
"""

from sky.provision.oci.config import bootstrap_instances
from sky.provision.oci.instance import cleanup_ports
from sky.provision.oci.instance import get_cluster_info
from sky.provision.oci.instance import open_ports
from sky.provision.oci.instance import query_instances
from sky.provision.oci.instance import run_instances
from sky.provision.oci.instance import stop_instances
from sky.provision.oci.instance import terminate_instances
from sky.provision.oci.instance import wait_instances
47 changes: 47 additions & 0 deletions sky/provision/oci/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""OCI configuration bootstrapping.

Creates the resource group and deploys the configuration template to OCI for
a cluster to be launched.

History:
- Hysun He (hysun.he@oracle.com) @ Oct.16, 2024: Initial implementation
"""

from sky import sky_logging
from sky.adaptors import oci as oci_adaptor
from sky.clouds.utils import oci_utils
from sky.provision import common
from sky.provision.oci.query_utils import query_helper

logger = sky_logging.init_logger(__name__)


@common.log_function_start_end
def bootstrap_instances(
region: str, cluster_name_on_cloud: str,
config: common.ProvisionConfig) -> common.ProvisionConfig:
"""See sky/provision/__init__.py"""
# OCI module import and oci client
oci_adaptor.get_core_client(region, oci_utils.oci_config.get_profile())

# Find / create a compartment for creating instances.
compartment = query_helper.find_compartment(region)
assert compartment is not None
HysunHe marked this conversation as resolved.
Show resolved Hide resolved

# Find the configured VCN, or create a new one.
vcn = query_helper.find_create_vcn_subnet(region)
assert vcn is not None
HysunHe marked this conversation as resolved.
Show resolved Hide resolved

node_config = config.node_config

# Subscribe the image if it is from Marketplace listing.
query_helper.subscribe_image(
compartment_id=compartment,
listing_id=node_config['AppCatalogListingId'],
resource_version=node_config['ResourceVersion'],
region=region,
)

logger.info(f'Using cluster name: {cluster_name_on_cloud}')

return config
Loading
Loading