diff --git a/jenkins/migrationIntegPipelines/cleanupDeploymentCover.groovy b/jenkins/migrationIntegPipelines/cleanupDeploymentCover.groovy new file mode 100644 index 000000000..97f599d3d --- /dev/null +++ b/jenkins/migrationIntegPipelines/cleanupDeploymentCover.groovy @@ -0,0 +1,9 @@ +def gitBranch = params.GIT_BRANCH ?: 'main' +def gitUrl = params.GIT_REPO_URL ?: 'https://github.com/opensearch-project/opensearch-migrations.git' + +library identifier: "migrations-lib@${gitBranch}", retriever: modernSCM( + [$class: 'GitSCMSource', + remote: "${gitUrl}"]) + +// Shared library function (location from root: vars/cleanupDeployment.groovy) +cleanupDeployment() diff --git a/test/cleanupDeployment/Pipfile b/test/cleanupDeployment/Pipfile new file mode 100644 index 000000000..f4f7520d0 --- /dev/null +++ b/test/cleanupDeployment/Pipfile @@ -0,0 +1,11 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +boto3 = "*" +argparse = "*" + +[requires] +python_version = "3.11" diff --git a/test/cleanupDeployment/Pipfile.lock b/test/cleanupDeployment/Pipfile.lock new file mode 100644 index 000000000..22e26b2c8 --- /dev/null +++ b/test/cleanupDeployment/Pipfile.lock @@ -0,0 +1,86 @@ +{ + "_meta": { + "hash": { + "sha256": "d908e3cc0ea0b41313abda9d8934ed9d9be20580cadb01f96330d9fc0d9596c3" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "argparse": { + "hashes": [ + "sha256:62b089a55be1d8949cd2bc7e0df0bddb9e028faefc8c32038cc84862aefdd6e4", + "sha256:c31647edb69fd3d465a847ea3157d37bed1f95f19760b11a47aa91c04b666314" + ], + "index": "pypi", + "version": "==1.4.0" + }, + "boto3": { + "hashes": [ + "sha256:72eb73d90448632d7388644388be6977293ccb8fbfefd5fd39d7e75ff2d48f8a", + "sha256:73d4f22b57a725f0e8a6e0c4b2d16336c128e39f3189c24f9e513daa7c14936b" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.35.35" + }, + "botocore": { + "hashes": [ + "sha256:44b843e310b6338c3086908928709c7a303a2bb0326ea3c93ece5ac5afafb6c8", + "sha256:899d303046391caa1d05093a673e52d02185b37bc64bd78771ad6752167a25ab" + ], + "markers": "python_version >= '3.8'", + "version": "==1.35.35" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "python-dateutil": { + "hashes": [ + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "version": "==2.9.0.post0" + }, + "s3transfer": { + "hashes": [ + "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6", + "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69" + ], + "markers": "python_version >= '3.8'", + "version": "==0.10.2" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "version": "==1.16.0" + }, + "urllib3": { + "hashes": [ + "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", + "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9" + ], + "markers": "python_version >= '3.10'", + "version": "==2.2.3" + } + }, + "develop": {} +} diff --git a/test/cleanupDeployment/README.md b/test/cleanupDeployment/README.md new file mode 100644 index 000000000..1dc95fe96 --- /dev/null +++ b/test/cleanupDeployment/README.md @@ -0,0 +1,20 @@ +# Cleanup Deployment + +Utility tool for removing deployed resources + +### Running Tool + +If pipenv is not installed, install with below +```shell +python3 -m pip install --upgrade pipenv +``` + +Install dependencies +```shell +pipenv install --deploy +``` + +Run clean deployment +```shell +pipenv run python3 cleanup_deployment.py --stage rfs-integ1 +``` diff --git a/test/cleanupDeployment/cleanup_deployment.py b/test/cleanupDeployment/cleanup_deployment.py new file mode 100644 index 000000000..634b576fa --- /dev/null +++ b/test/cleanupDeployment/cleanup_deployment.py @@ -0,0 +1,164 @@ +import argparse +import boto3 +import logging +import re +import time +from typing import List + +from botocore.exceptions import ClientError + +logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', level=logging.INFO) +logger = logging.getLogger(__name__) + +INDEPENDENT_STACKS = ['MigrationConsole', 'ReindexFromSnapshot', 'TrafficReplayer', 'TargetClusterProxy', + 'CaptureProxy', 'KafkaBroker', 'OpenSearchContainer', 'CaptureProxyES', 'Elasticsearch'] +CORE_STACKS_ORDERED = ['MigrationInfra', 'OpenSearchDomain', 'NetworkInfra', 'infra-stack', 'network-stack'] +CFN_INITIAL_STATUS_SKIP = ['DELETE_IN_PROGRESS', 'DELETE_COMPLETE'] +MAX_DELETE_STACK_RETRIES = 3 +MAX_WAIT_MINUTES = 45 +WAIT_INTERVAL_SECONDS = 15 + + +class DeleteStackFailure(Exception): + pass + + +class DeleteStackTimeout(Exception): + pass + + +class StackDeletionRequest: + def __init__(self, stack_name): + self.stack_name = stack_name + self.retry_count = 0 + + +def delete_stack(cfn_client, stack_name: str) -> StackDeletionRequest: + try: + describe_stack_response = cfn_client.describe_stacks(StackName=stack_name) + stack_status = describe_stack_response['Stacks'][0]['StackStatus'] + except ClientError as client_error: + if 'does not exist' in client_error.response['Error']['Message']: + logger.warning(f"Stack {stack_name} no longer exists, skipping its deletion") + return StackDeletionRequest(stack_name=stack_name) + else: + raise client_error + if 'IN_PROGRESS' in stack_status: + logger.warning(f"Unexpected status: {stack_status} for {stack_name} when preparing to delete stack") + logger.info(f"Deleting stack: {stack_name}") + cfn_client.delete_stack(StackName=stack_name) + return StackDeletionRequest(stack_name=stack_name) + + +def retry_delete_stack(cfn_client, deletion_request: StackDeletionRequest): + if deletion_request.retry_count >= MAX_DELETE_STACK_RETRIES: + raise DeleteStackFailure(f"Max attempts of {MAX_DELETE_STACK_RETRIES} have failed to delete " + f"stack: {deletion_request.stack_name}. Please see CFN stack logs for more details") + logger.info(f"Retry attempt {deletion_request.retry_count + 1} of {MAX_DELETE_STACK_RETRIES} for " + f"stack: {deletion_request.stack_name}") + delete_stack(cfn_client=cfn_client, stack_name=deletion_request.stack_name) + deletion_request.retry_count += 1 + return deletion_request + + +def wait_for_stack_deletion(cfn_client, stack_delete_requests: List[StackDeletionRequest]): + wait_time_seconds = 0 + remaining_requests = stack_delete_requests[:] + + while remaining_requests and wait_time_seconds < (MAX_WAIT_MINUTES * 60): + # Temporary list for stacks that are still being deleted + in_progress_requests = [] + + for delete_request in remaining_requests: + stack_status = "" + try: + describe_stack_response = cfn_client.describe_stacks(StackName=delete_request.stack_name) + stack_status = describe_stack_response['Stacks'][0].get('StackStatus') + except ClientError as client_error: + if 'does not exist' in client_error.response['Error']['Message']: + continue + + if stack_status == 'DELETE_COMPLETE': + logger.info(f"Stack {delete_request.stack_name} deletion completed.") + elif stack_status == 'DELETE_FAILED': + logger.error(f"Stack {delete_request.stack_name} deletion failed, retrying...") + retry_delete_stack(cfn_client=cfn_client, deletion_request=delete_request) + in_progress_requests.append(delete_request) # Keep for further checks after retry + elif stack_status == 'DELETE_IN_PROGRESS': + logger.info(f"Stack {delete_request.stack_name} is currently DELETE_IN_PROGRESS.") + in_progress_requests.append(delete_request) # Still in progress + else: + logger.warning(f"Unexpected status: {stack_status} for stack: {delete_request.stack_name}") + in_progress_requests.append(delete_request) # Unexpected status but still in progress + + remaining_requests = in_progress_requests + if remaining_requests: + logger.info(f"Waiting for the following stacks: {[r.stack_name for r in remaining_requests]}") + + time.sleep(WAIT_INTERVAL_SECONDS) + wait_time_seconds += WAIT_INTERVAL_SECONDS + + if remaining_requests: + raise DeleteStackTimeout(f"Timeout reached. The following stacks were still in " + f"progress: {[r.stack_name for r in remaining_requests]}") + else: + logger.info(f"The following stacks have been deleted " + f"successfully: {[s.stack_name for s in stack_delete_requests]}") + + +def delete_stacks(cfn_client, stack_names): + # Delete independent stacks in batch + independent_stack_delete_requests = [ + delete_stack(cfn_client, stack_name) + for stack_name in stack_names + if any(stack_id in stack_name for stack_id in INDEPENDENT_STACKS) + ] + if independent_stack_delete_requests: + wait_for_stack_deletion(cfn_client=cfn_client, stack_delete_requests=independent_stack_delete_requests) + + # Delete core stacks in order, and batch for a particular stack type + for core_id in CORE_STACKS_ORDERED: + core_delete_requests = [] + matching_stacks = [s for s in stack_names if core_id in s] + for stack in matching_stacks: + core_delete_requests.append(delete_stack(cfn_client, stack)) + if core_delete_requests: + wait_for_stack_deletion(cfn_client=cfn_client, stack_delete_requests=core_delete_requests) + + +def delete_stacks_for_environment(stage_name: str): + client = boto3.client('cloudformation') + list_stacks_response = client.list_stacks() + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation/client/list_stacks.html + stack_names = [stack['StackName'] for stack in list_stacks_response['StackSummaries'] + if stack['StackStatus'] not in CFN_INITIAL_STATUS_SKIP] + next_token = list_stacks_response.get("NextToken", None) + # If list stacks response is paginated, continue till all stacks are retrieved + while next_token is not None: + next_list_stacks_response = client.list_stacks(NextToken=next_token) + next_stack_names = [stack['StackName'] for stack in next_list_stacks_response['StackSummaries'] + if stack['StackStatus'] not in CFN_INITIAL_STATUS_SKIP] + stack_names.extend(next_stack_names) + list_stacks_response.get("NextToken", None) + + stage_stack_names = [] + for name in stack_names: + # Add stack that has stage name in middle(-stage-) or at end(-stage) of stack name + if re.match(rf".*-{stage_name}-.*|.*-{stage_name}$", name): + stage_stack_names.append(name) + logging.info(f"Collected the following stacks to delete: {stage_stack_names}") + delete_stacks(cfn_client=client, stack_names=stage_stack_names) + + +def main(): + parser = argparse.ArgumentParser(description="Cleanup an opensearch-migrations deployment environment.") + parser.add_argument("--stage", type=str, help="The deployment stage environment to delete") + args = parser.parse_args() + + start_time = time.time() + delete_stacks_for_environment(args.stage) + print(f"Total running time: {time.time() - start_time} seconds") + + +if __name__ == "__main__": + main() diff --git a/vars/cleanupDeployment.groovy b/vars/cleanupDeployment.groovy new file mode 100644 index 000000000..5a1f2d52a --- /dev/null +++ b/vars/cleanupDeployment.groovy @@ -0,0 +1,47 @@ +def call(Map config = [:]) { + + pipeline { + agent { label config.workerAgent ?: 'Jenkins-Default-Agent-X64-C5xlarge-Single-Host' } + + parameters { + string(name: 'GIT_REPO_URL', defaultValue: 'https://github.com/opensearch-project/opensearch-migrations.git', description: 'Git repository url') + string(name: 'GIT_BRANCH', defaultValue: 'main', description: 'Git branch to use for repository') + string(name: 'STAGE', description: 'Deployment stage name in group to delete (e.g. rfs-integ1)') + } + + options { + // Acquire lock on a given deployment stage + lock(resource: params.STAGE, variable: 'stage') + timeout(time: 1, unit: 'HOURS') + buildDiscarder(logRotator(daysToKeepStr: '30')) + } + + stages { + stage('Checkout') { + steps { + script { + git branch: "${params.GIT_BRANCH}", url: "${params.GIT_REPO_URL}" + } + } + } + + stage('Cleanup Deployment') { + steps { + timeout(time: 1, unit: 'HOURS') { + dir('test/cleanupDeployment') { + script { + sh "sudo --preserve-env pipenv install --deploy --ignore-pipfile" + def command = "pipenv run python3 cleanup_deployment.py --stage ${stage}" + withCredentials([string(credentialsId: 'migrations-test-account-id', variable: 'MIGRATIONS_TEST_ACCOUNT_ID')]) { + withAWS(role: 'JenkinsDeploymentRole', roleAccount: "${MIGRATIONS_TEST_ACCOUNT_ID}", region: "us-east-1", duration: 3600, roleSessionName: 'jenkins-session') { + sh "sudo --preserve-env ${command}" + } + } + } + } + } + } + } + } + } +}