From efbdc4f532424b609bb1b23ecbdea5f7854a83e1 Mon Sep 17 00:00:00 2001 From: jyejare Date: Thu, 1 Aug 2024 21:03:38 +0530 Subject: [PATCH] Develop: Rebase for OCP resource cleanup on AWS --- .pre-commit-config.yaml | 2 +- cloudwash/cli.py | 8 ++- cloudwash/constants.py | 2 +- cloudwash/entities/providers.py | 9 +++ cloudwash/entities/resources/base.py | 18 ++++++ cloudwash/entities/resources/ocps.py | 60 +++++++++++++++++++ cloudwash/providers/aws.py | 65 ++++++++++++-------- cloudwash/utils.py | 90 ++++++++++++++++++++++++++++ settings.yaml.template | 5 ++ 9 files changed, 232 insertions(+), 27 deletions(-) create mode 100644 cloudwash/entities/resources/ocps.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e2a0bf29a..440ee222e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,6 +15,6 @@ repos: hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 7.1.0 hooks: - id: flake8 diff --git a/cloudwash/cli.py b/cloudwash/cli.py index bbcff222f..176af7c11 100644 --- a/cloudwash/cli.py +++ b/cloudwash/cli.py @@ -98,8 +98,13 @@ def azure(ctx, vms, discs, nics, images, pips, _all, _all_rg): @click.option("--images", is_flag=True, help="Remove only images from the provider") @click.option("--pips", is_flag=True, help="Remove only Public IPs from the provider") @click.option("--stacks", is_flag=True, help="Remove only CloudFormations from the provider") +@click.option( + "--ocps", + is_flag=True, + help="Remove only unused OCP Cluster occupied resources from the provider", +) @click.pass_context -def aws(ctx, vms, discs, nics, images, pips, stacks, _all): +def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all): # Validate Amazon Settings validate_provider(ctx.command.name) is_dry_run = ctx.parent.params["dry"] @@ -110,6 +115,7 @@ def aws(ctx, vms, discs, nics, images, pips, stacks, _all): images=images, pips=pips, stacks=stacks, + ocps=ocps, _all=_all, dry_run=is_dry_run, ) diff --git a/cloudwash/constants.py b/cloudwash/constants.py index d60b570bf..c7fe97d25 100644 --- a/cloudwash/constants.py +++ b/cloudwash/constants.py @@ -1,2 +1,2 @@ -aws_data = ['VMS', 'NICS', 'DISCS', 'PIPS', 'RESOURCES', 'STACKS'] +aws_data = ['VMS', 'NICS', 'DISCS', 'PIPS', 'RESOURCES', 'STACKS', 'OCPS'] azure_data = ['VMS', 'NICS', 'DISCS', 'IMAGES', 'PIPS', 'RESOURCES'] diff --git a/cloudwash/entities/providers.py b/cloudwash/entities/providers.py index cf6d0136e..e5505913c 100644 --- a/cloudwash/entities/providers.py +++ b/cloudwash/entities/providers.py @@ -4,6 +4,7 @@ from cloudwash.entities.resources.images import CleanAzureImages from cloudwash.entities.resources.nics import CleanAWSNics from cloudwash.entities.resources.nics import CleanAzureNics +from cloudwash.entities.resources.ocps import CleanAWSOcps from cloudwash.entities.resources.pips import CleanAWSPips from cloudwash.entities.resources.pips import CleanAzurePips from cloudwash.entities.resources.stacks import CleanAWSStacks @@ -15,6 +16,14 @@ class providerCleanup: def __init__(self, client): self.client = client + @property + def ocps(self): + providerclass = self.__class__.__name__ + if 'AWS' in providerclass: + return CleanAWSOcps(client=self.client) + else: + raise NotImplementedError(f'The OCPs cleanup on {providerclass} is not implemented') + @property def vms(self): providerclass = self.__class__.__name__ diff --git a/cloudwash/entities/resources/base.py b/cloudwash/entities/resources/base.py index 5aff98440..b70ff1b43 100644 --- a/cloudwash/entities/resources/base.py +++ b/cloudwash/entities/resources/base.py @@ -16,6 +16,24 @@ def _set_dry(self): pass +class OCPsCleanup(ResourceCleanup): + @abstractmethod + def list(self): + pass + + @abstractmethod + def cleanup(self): + pass + + @abstractmethod + def remove(self): + pass + + @abstractmethod + def _set_dry(self): + pass + + class DiscsCleanup(ResourceCleanup): @abstractmethod def list(self): diff --git a/cloudwash/entities/resources/ocps.py b/cloudwash/entities/resources/ocps.py new file mode 100644 index 000000000..38bd284b7 --- /dev/null +++ b/cloudwash/entities/resources/ocps.py @@ -0,0 +1,60 @@ +from cloudwash.config import settings +from cloudwash.entities.resources.base import OCPsCleanup +from cloudwash.utils import calculate_time_threshold +from cloudwash.utils import dry_data +from cloudwash.utils import filter_resources_by_time_modified +from cloudwash.utils import group_ocps_by_cluster +from cloudwash.utils import OCP_TAG_SUBSTR + + +class CleanOCPs(OCPsCleanup): + def __init__(self, client): + self.client = client + self._delete = [] + self.list() + + def _set_dry(self): + # VMsContainer = namedtuple('VMsCotainer', ['delete', 'stop', 'skip']) + # return VMsContainer(self._delete, self._stop, self._skip) + dry_data['OCPS']['delete'] = self._delete + + def list(self): + pass + + def remove(self): + pass + + def cleanup(self): + if not settings.dry_run: + self.remove() + + +class CleanAWSOcps(CleanOCPs): + def list(self): + time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.sla) + + query = " ".join([f"tag.key:{OCP_TAG_SUBSTR}*", f"region:{self.client.cleaning_region}"]) + resources = self.client.list_resources(query=query) + + # Prepare resources to be filtered before deletion + cluster_map = group_ocps_by_cluster(resources=resources) + for cluster_name in cluster_map.keys(): + cluster_resources = cluster_map[cluster_name].get("Resources") + instances = cluster_map[cluster_name].get("Instances") + + if instances: + # For resources with associated EC2 Instances, filter by Instances SLA + if not filter_resources_by_time_modified( + time_threshold, + resources=instances, + ): + self._delete.extend(cluster_resources) + else: + # For resources with no associated EC2 Instances, identify as leftovers + self._delete.extend( + filter_resources_by_time_modified(time_threshold, resources=cluster_resources) + ) + + # Sort resources by type + self._delete = sorted(self._delete, key=lambda x: x.resource_type) + self._set_dry() diff --git a/cloudwash/providers/aws.py b/cloudwash/providers/aws.py index dbb9c3e45..7215452e5 100644 --- a/cloudwash/providers/aws.py +++ b/cloudwash/providers/aws.py @@ -11,27 +11,44 @@ def cleanup(**kwargs): is_dry_run = kwargs.get("dry_run", False) regions = settings.aws.auth.regions - if "all" in regions: - with compute_client("aws", aws_region="us-west-2") as client: - regions = client.list_regions() - for region in regions: - for items in data: - dry_data[items]['delete'] = [] - with compute_client("aws", aws_region=region) as aws_client: - awscleanup = AWSCleanup(client=aws_client) - # Actual Cleaning and dry execution - logger.info(f"\nResources from the region: {region}") - if kwargs["vms"] or kwargs["_all"]: - awscleanup.vms.cleanup() - if kwargs["nics"] or kwargs["_all"]: - awscleanup.nics.cleanup() - if kwargs["discs"] or kwargs["_all"]: - awscleanup.discs.cleanup() - if kwargs["pips"] or kwargs["_all"]: - awscleanup.pips.cleanup() - if kwargs["images"] or kwargs["_all"]: - awscleanup.images.cleanup() - if kwargs["stacks"] or kwargs["_all"]: - awscleanup.stacks.cleanup() - if is_dry_run: - echo_dry(dry_data) + if kwargs["ocps"]: + aws_client_region = settings.aws.criteria.ocps.ocp_client_region + with compute_client("aws", aws_region=aws_client_region) as aws_ocp_client: + if "all" in regions: + regions = aws_ocp_client.list_regions() + awscleanup = AWSCleanup(client=aws_ocp_client) + for region in regions: + aws_ocp_client.cleaning_region = region + # Emptying the dry data for previous region everytime + for items in data: + dry_data[items]['delete'] = [] + logger.info(f"\nResources from the region: {region}") + awscleanup.ocps.cleanup() + if is_dry_run: + echo_dry(dry_data) + else: + if "all" in regions: + with compute_client("aws", aws_region="us-west-2") as client: + regions = client.list_regions() + for region in regions: + # Emptying the dry data for previous region everytime + for items in data: + dry_data[items]['delete'] = [] + with compute_client("aws", aws_region=region) as aws_client: + awscleanup = AWSCleanup(client=aws_client) + # Actual Cleaning and dry execution + logger.info(f"\nResources from the region: {region}") + if kwargs["vms"] or kwargs["_all"]: + awscleanup.vms.cleanup() + if kwargs["nics"] or kwargs["_all"]: + awscleanup.nics.cleanup() + if kwargs["discs"] or kwargs["_all"]: + awscleanup.discs.cleanup() + if kwargs["pips"] or kwargs["_all"]: + awscleanup.pips.cleanup() + if kwargs["images"] or kwargs["_all"]: + awscleanup.images.cleanup() + if kwargs["stacks"] or kwargs["_all"]: + awscleanup.stacks.cleanup() + if is_dry_run: + echo_dry(dry_data) diff --git a/cloudwash/utils.py b/cloudwash/utils.py index e450b9919..df13f936a 100644 --- a/cloudwash/utils.py +++ b/cloudwash/utils.py @@ -2,19 +2,25 @@ from collections import namedtuple from datetime import datetime +import dateparser import pytz +from wrapanapi.systems.ec2 import ResourceExplorerResource from cloudwash.logger import logger +OCP_TAG_SUBSTR = "kubernetes.io/cluster/" + _vms_dict = {"VMS": {"delete": [], "stop": [], "skip": []}} dry_data = { "NICS": {"delete": []}, "DISCS": {"delete": []}, "PIPS": {"delete": []}, + "OCPS": {"delete": []}, "RESOURCES": {"delete": []}, "STACKS": {"delete": []}, "IMAGES": {"delete": []}, } + dry_data.update(_vms_dict) @@ -32,6 +38,12 @@ def echo_dry(dry_data=None) -> None: deletable_nics = dry_data["NICS"]["delete"] deletable_images = dry_data["IMAGES"]["delete"] deletable_pips = dry_data["PIPS"]["delete"] if "PIPS" in dry_data else None + deletable_ocps = { + ocp.resource_type: [ + r.name for r in dry_data["OCPS"]["delete"] if r.resource_type == ocp.resource_type + ] + for ocp in dry_data["OCPS"]["delete"] + } deletable_resources = dry_data["RESOURCES"]["delete"] deletable_stacks = dry_data["STACKS"]["delete"] if "STACKS" in dry_data else None if deletable_vms or stopable_vms or skipped_vms: @@ -39,6 +51,7 @@ def echo_dry(dry_data=None) -> None: f"VMs:\n\tDeletable: {deletable_vms}\n\tStoppable: {stopable_vms}\n\t" f"Skip: {skipped_vms}" ) + if deletable_discs: logger.info(f"DISCs:\n\tDeletable: {deletable_discs}") if deletable_nics: @@ -47,6 +60,8 @@ def echo_dry(dry_data=None) -> None: logger.info(f"IMAGES:\n\tDeletable: {deletable_images}") if deletable_pips: logger.info(f"PIPs:\n\tDeletable: {deletable_pips}") + if deletable_ocps: + logger.info(f"OCPs:\n\tDeletable: {deletable_ocps}") if deletable_resources: logger.info(f"RESOURCEs:\n\tDeletable: {deletable_resources}") if deletable_stacks: @@ -61,6 +76,7 @@ def echo_dry(dry_data=None) -> None: deletable_resources, deletable_stacks, deletable_images, + deletable_ocps, ] ): logger.info("\nNo resources are eligible for cleanup!") @@ -112,3 +128,77 @@ def gce_zones() -> list: _zones_combo = {**_bcds, **_abcfs, **_abcs} zones = [f"{loc}-{zone}" for loc, zones in _zones_combo.items() for zone in zones] return zones + + +def group_ocps_by_cluster(resources: list = None) -> dict: + """Group different types of AWS resources under their original OCP clusters + :param list resources: AWS resources collected by defined region and sla + :return: A dictionary with the clusters as keys and the associated resources as values + """ + if resources is None: + resources = [] + clusters_map = {} + + for resource in resources: + for key in resource.get_tags(regex=OCP_TAG_SUBSTR): + cluster_name = key.get("Key") + if OCP_TAG_SUBSTR in cluster_name: + cluster_name = cluster_name.split(OCP_TAG_SUBSTR)[1] + if cluster_name not in clusters_map.keys(): + clusters_map[cluster_name] = {"Resources": [], "Instances": []} + + # Set cluster's EC2 instances + if hasattr(resource, 'ec2_instance'): + clusters_map[cluster_name]["Instances"].append(resource) + # Set resource under cluster + else: + clusters_map[cluster_name]["Resources"].append(resource) + return clusters_map + + +def calculate_time_threshold(time_ref=""): + """Parses a time reference for data filtering + :param str time_ref: a relative time reference for indicating the filter value + of a relative time, given in a {time_value}{time_unit} format; default is "" (no filtering) + :return datetime time_threshold + """ + if time_ref is None: + time_ref = "" + + if time_ref.isnumeric(): + # Use default time value as Minutes + time_ref += "m" + + # Time Ref is Optional; if empty, time_threshold will be set as "now" + time_threshold = dateparser.parse(f"now-{time_ref}-UTC") + logger.debug( + f"\nAssociated OCP resources are filtered by last creation time of: {time_threshold}" + ) + return time_threshold + + +def filter_resources_by_time_modified( + time_threshold, + resources: list[ResourceExplorerResource] = None, +) -> list: + """ + Filter list of AWS resources by checking modification date ("LastReportedAt") + :param datetime time_threshold: Time filtering criteria + :param list resources: List of resources to be filtered out + :return: list of resources that last modified before time threshold + :Example: + Use the time_ref "1h" to collect resources that exist for more than an hour + """ + filtered_resources = [] + + for resource in resources: + # Will not collect resources recorded during the SLA time + if resource.date_modified > time_threshold: + continue + filtered_resources.append(resource) + return filtered_resources + + +def delete_ocp(ocp): + # WIP: add support for deletion + pass diff --git a/settings.yaml.template b/settings.yaml.template index c3ed96f3b..a139bd643 100644 --- a/settings.yaml.template +++ b/settings.yaml.template @@ -97,6 +97,11 @@ AWS: DELETE_STACK: 'test' # Number of minutes the deletable CloudFormation should be allowed to live, e.g 120 minutes = 2 Hours SLA_MINUTES: 120 + OCPS: + OCP_CLIENT_REGION: "us-east-1" + # Specified as {time_value}{time_unit} format, e.g. "7d" = 7 Days + # If a time unit is not specified (the value is numeric), it will be considered as Minutes + SLA: 7d EXCEPTIONS: VM: # VM names that would be skipped from cleanup