Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop: Rebase for OCP resource cleanup on AWS #137

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cloudwash/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,13 @@ def azure(ctx, vms, discs, nics, images, pips, _all, _all_rg):
@click.option("--images", is_flag=True, help="Remove only images from the provider")
@click.option("--pips", is_flag=True, help="Remove only Public IPs from the provider")
@click.option("--stacks", is_flag=True, help="Remove only CloudFormations from the provider")
@click.option(
"--ocps",
is_flag=True,
help="Remove only unused OCP Cluster occupied resources from the provider",
)
@click.pass_context
def aws(ctx, vms, discs, nics, images, pips, stacks, _all):
def aws(ctx, vms, discs, nics, images, pips, stacks, ocps, _all):
# Validate Amazon Settings
validate_provider(ctx.command.name)
is_dry_run = ctx.parent.params["dry"]
Expand All @@ -111,6 +116,7 @@ def aws(ctx, vms, discs, nics, images, pips, stacks, _all):
images=images,
pips=pips,
stacks=stacks,
ocps=ocps,
_all=_all,
dry_run=is_dry_run,
)
Expand Down
2 changes: 1 addition & 1 deletion cloudwash/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
aws_data = ['VMS', 'NICS', 'DISCS', 'PIPS', 'RESOURCES', 'STACKS']
aws_data = ['VMS', 'NICS', 'DISCS', 'PIPS', 'RESOURCES', 'STACKS', 'OCPS']
azure_data = ['VMS', 'NICS', 'DISCS', 'IMAGES', 'PIPS', 'RESOURCES']
gce_data = ['VMS', 'NICS', 'DISCS']
container_data = ['CONTAINERS']
9 changes: 9 additions & 0 deletions cloudwash/entities/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cloudwash.entities.resources.images import CleanAzureImages
from cloudwash.entities.resources.nics import CleanAWSNics
from cloudwash.entities.resources.nics import CleanAzureNics
from cloudwash.entities.resources.ocps import CleanAWSOcps
from cloudwash.entities.resources.pips import CleanAWSPips
from cloudwash.entities.resources.pips import CleanAzurePips
from cloudwash.entities.resources.stacks import CleanAWSStacks
Expand All @@ -17,6 +18,14 @@ class providerCleanup:
def __init__(self, client):
self.client = client

@property
def ocps(self):
providerclass = self.__class__.__name__
if 'AWS' in providerclass:
return CleanAWSOcps(client=self.client)
else:
raise NotImplementedError(f'The OCPs cleanup on {providerclass} is not implemented')

@property
def vms(self):
providerclass = self.__class__.__name__
Expand Down
18 changes: 18 additions & 0 deletions cloudwash/entities/resources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@ def _set_dry(self):
pass


class OCPsCleanup(ResourceCleanup):
@abstractmethod
def list(self):
pass

@abstractmethod
def cleanup(self):
pass

@abstractmethod
def remove(self):
pass

@abstractmethod
def _set_dry(self):
pass


class DiscsCleanup(ResourceCleanup):
@abstractmethod
def list(self):
Expand Down
58 changes: 58 additions & 0 deletions cloudwash/entities/resources/ocps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from cloudwash.config import settings
from cloudwash.entities.resources.base import OCPsCleanup
from cloudwash.utils import calculate_time_threshold
from cloudwash.utils import dry_data
from cloudwash.utils import filter_resources_by_time_modified
from cloudwash.utils import group_ocps_by_cluster
from cloudwash.utils import OCP_TAG_SUBSTR


class CleanOCPs(OCPsCleanup):
def __init__(self, client):
self.client = client
self._delete = []
self.list()

def _set_dry(self):
dry_data['OCPS']['delete'] = self._delete

def list(self):
pass

def remove(self):
pass

def cleanup(self):
if not settings.dry_run:
self.remove()


class CleanAWSOcps(CleanOCPs):
def list(self):
time_threshold = calculate_time_threshold(time_ref=settings.aws.criteria.ocps.sla)

query = " ".join([f"tag.key:{OCP_TAG_SUBSTR}*", f"region:{self.client.cleaning_region}"])
resources = self.client.list_resources(query=query)

# Prepare resources to be filtered before deletion
cluster_map = group_ocps_by_cluster(resources=resources)
for cluster_name in cluster_map.keys():
cluster_resources = cluster_map[cluster_name].get("Resources")
instances = cluster_map[cluster_name].get("Instances")

if instances:
# For resources with associated EC2 Instances, filter by Instances SLA
if not filter_resources_by_time_modified(
time_threshold,
resources=instances,
):
self._delete.extend(cluster_resources)
else:
# For resources with no associated EC2 Instances, identify as leftovers
self._delete.extend(
filter_resources_by_time_modified(time_threshold, resources=cluster_resources)
)

# Sort resources by type
self._delete = sorted(self._delete, key=lambda x: x.resource_type)
self._set_dry()
65 changes: 41 additions & 24 deletions cloudwash/providers/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,44 @@ def cleanup(**kwargs):
is_dry_run = kwargs.get("dry_run", False)
jyejare marked this conversation as resolved.
Show resolved Hide resolved
dry_data['PROVIDER'] = "AWS"
regions = settings.aws.auth.regions
if "all" in regions:
with compute_client("aws", aws_region="us-west-2") as client:
regions = client.list_regions()
for region in regions:
for items in data:
dry_data[items]['delete'] = []
with compute_client("aws", aws_region=region) as aws_client:
awscleanup = AWSCleanup(client=aws_client)
# Actual Cleaning and dry execution
logger.info(f"\nResources from the region: {region}")
if kwargs["vms"] or kwargs["_all"]:
awscleanup.vms.cleanup()
if kwargs["nics"] or kwargs["_all"]:
awscleanup.nics.cleanup()
if kwargs["discs"] or kwargs["_all"]:
awscleanup.discs.cleanup()
if kwargs["pips"] or kwargs["_all"]:
awscleanup.pips.cleanup()
if kwargs["images"] or kwargs["_all"]:
awscleanup.images.cleanup()
if kwargs["stacks"] or kwargs["_all"]:
awscleanup.stacks.cleanup()
if is_dry_run:
echo_dry(dry_data)
if kwargs["ocps"]:
aws_client_region = settings.aws.criteria.ocps.ocp_client_region
with compute_client("aws", aws_region=aws_client_region) as aws_ocp_client:
if "all" in regions:
regions = aws_ocp_client.list_regions()
awscleanup = AWSCleanup(client=aws_ocp_client)
for region in regions:
aws_ocp_client.cleaning_region = region
# Emptying the dry data for previous region everytime
for items in data:
dry_data[items]['delete'] = []
logger.info(f"\nResources from the region: {region}")
awscleanup.ocps.cleanup()
if is_dry_run:
echo_dry(dry_data)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might not want to use else since the cleanup can be set to execute OCPs and other resources as well (or use kwargs["_all"])

Copy link
Collaborator Author

@jyejare jyejare Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you see a scenario where OCPs and other resources in the list would be called at the same time for cleanup. ?

If yes, we should rather say ocp-clusters to be more specific, so it would look like a resource type within AWS provider.

I thought it is good to keep OCP cluster cleanup separate from other resources cleanup as cluster cleanup will anyways cleanup what it suppose to.

Thoughts ?

if "all" in regions:
with compute_client("aws", aws_region="us-west-2") as client:
regions = client.list_regions()
for region in regions:
# Emptying the dry data for previous region everytime
for items in data:
dry_data[items]['delete'] = []
with compute_client("aws", aws_region=region) as aws_client:
awscleanup = AWSCleanup(client=aws_client)
# Actual Cleaning and dry execution
logger.info(f"\nResources from the region: {region}")
if kwargs["vms"] or kwargs["_all"]:
awscleanup.vms.cleanup()
if kwargs["nics"] or kwargs["_all"]:
awscleanup.nics.cleanup()
if kwargs["discs"] or kwargs["_all"]:
awscleanup.discs.cleanup()
if kwargs["pips"] or kwargs["_all"]:
awscleanup.pips.cleanup()
if kwargs["images"] or kwargs["_all"]:
awscleanup.images.cleanup()
if kwargs["stacks"] or kwargs["_all"]:
awscleanup.stacks.cleanup()
if is_dry_run:
echo_dry(dry_data)
Comment on lines +54 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following the last comment, since the cleanup can be executed on multiple resources (OCPs and others), I think the if is_dry_run: component should be outside both sections and appear once only, post all resources cleanup executions.

Suggested change
if is_dry_run:
echo_dry(dry_data)
if is_dry_run:
echo_dry(dry_data)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dry run printing here is per region basis hence having it within a loop makes sense.

If we make it at the end of loop it will only print resources from the last region only as a buggy behavior .

86 changes: 86 additions & 0 deletions cloudwash/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,28 @@
from dominate.tags import td
from dominate.tags import tr
from dominate.util import raw
import dateparser
from wrapanapi.systems.ec2 import ResourceExplorerResource

from cloudwash.assets import css
from cloudwash.logger import logger

OCP_TAG_SUBSTR = "kubernetes.io/cluster/"

_vms_dict = {"VMS": {"delete": [], "stop": [], "skip": []}}
_containers_dict = {"CONTAINERS": {"delete": [], "stop": [], "skip": []}}

dry_data = {
"NICS": {"delete": []},
"DISCS": {"delete": []},
"PIPS": {"delete": []},
"OCPS": {"delete": []},
"RESOURCES": {"delete": []},
"STACKS": {"delete": []},
"IMAGES": {"delete": []},
"PROVIDER": "",
}

dry_data.update(_vms_dict)
dry_data.update(_containers_dict)

Expand All @@ -41,6 +47,7 @@ def echo_dry(dry_data=None) -> None:
it follows the format of module scoped `dry_data` variable in this module
"""
logger.info("\n=========== DRY SUMMARY ============\n")

resource_data = {
"provider": dry_data.get('PROVIDER'),
"deletable_vms": dry_data["VMS"]["delete"],
Expand All @@ -55,6 +62,11 @@ def echo_dry(dry_data=None) -> None:
"deletable_pips": dry_data["PIPS"]["delete"] if "PIPS" in dry_data else None,
"deletable_resources": dry_data["RESOURCES"]["delete"],
"deletable_stacks": dry_data["STACKS"]["delete"] if "STACKS" in dry_data else None,
"deletable_ocps": {
ocp.resource_type: [
r.name for r in dry_data["OCPS"]["delete"] if r.resource_type == ocp.resource_type
] for ocp in dry_data["OCPS"]["delete"]
}
}

# Group the same resource type under the same section for logging
Expand Down Expand Up @@ -155,3 +167,77 @@ def gce_zones() -> list:
_zones_combo = {**_bcds, **_abcfs, **_abcs}
zones = [f"{loc}-{zone}" for loc, zones in _zones_combo.items() for zone in zones]
return zones


def group_ocps_by_cluster(resources: list = None) -> dict:
"""Group different types of AWS resources under their original OCP clusters
:param list resources: AWS resources collected by defined region and sla
:return: A dictionary with the clusters as keys and the associated resources as values
"""
if resources is None:
resources = []
clusters_map = {}

for resource in resources:
for key in resource.get_tags(regex=OCP_TAG_SUBSTR):
cluster_name = key.get("Key")
if OCP_TAG_SUBSTR in cluster_name:
cluster_name = cluster_name.split(OCP_TAG_SUBSTR)[1]
if cluster_name not in clusters_map.keys():
clusters_map[cluster_name] = {"Resources": [], "Instances": []}

# Set cluster's EC2 instances
if hasattr(resource, 'ec2_instance'):
clusters_map[cluster_name]["Instances"].append(resource)
# Set resource under cluster
else:
clusters_map[cluster_name]["Resources"].append(resource)
return clusters_map


def calculate_time_threshold(time_ref=""):
"""Parses a time reference for data filtering
:param str time_ref: a relative time reference for indicating the filter value
of a relative time, given in a {time_value}{time_unit} format; default is "" (no filtering)
:return datetime time_threshold
"""
if time_ref is None:
time_ref = ""

if time_ref.isnumeric():
# Use default time value as Minutes
time_ref += "m"

# Time Ref is Optional; if empty, time_threshold will be set as "now"
time_threshold = dateparser.parse(f"now-{time_ref}-UTC")
logger.debug(
f"\nAssociated OCP resources are filtered by last creation time of: {time_threshold}"
)
return time_threshold


def filter_resources_by_time_modified(
time_threshold,
resources: list[ResourceExplorerResource] = None,
) -> list:
"""
Filter list of AWS resources by checking modification date ("LastReportedAt")
:param datetime time_threshold: Time filtering criteria
:param list resources: List of resources to be filtered out
:return: list of resources that last modified before time threshold
:Example:
Use the time_ref "1h" to collect resources that exist for more than an hour
"""
filtered_resources = []

for resource in resources:
# Will not collect resources recorded during the SLA time
if resource.date_modified > time_threshold:
continue
filtered_resources.append(resource)
return filtered_resources


def delete_ocp(ocp):
# WIP: add support for deletion
pass
5 changes: 5 additions & 0 deletions settings.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ AWS:
DELETE_STACK: 'test'
# Number of minutes the deletable CloudFormation should be allowed to live, e.g 120 minutes = 2 Hours
SLA_MINUTES: 120
OCPS:
OCP_CLIENT_REGION: "us-east-1"
# Specified as {time_value}{time_unit} format, e.g. "7d" = 7 Days
# If a time unit is not specified (the value is numeric), it will be considered as Minutes
SLA: 7d
EXCEPTIONS:
VM:
# VM names that would be skipped from cleanup
Expand Down
Loading