Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iterate through all clusters to find instance #25

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 53 additions & 41 deletions cform/ecs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,8 @@ Resources:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
- ecs:ListClusters
- ecs:DescribeClusters
- ecs:ListContainerInstances
- ecs:DescribeContainerInstances
- ecs:UpdateContainerInstancesState
Expand Down Expand Up @@ -422,72 +424,82 @@ Resources:
import time
import boto3

CLUSTER = '${EcsClusterName}'
REGION = '${AWS::Region}'
REGION = '${AWS::Region}'

ECS = boto3.client('ecs', region_name=REGION)
ASG = boto3.client('autoscaling', region_name=REGION)
SNS = boto3.client('sns', region_name=REGION)

def find_ecs_instance_info(instance_id):
paginator = ECS.get_paginator('list_container_instances')
for list_resp in paginator.paginate(cluster=CLUSTER):
arns = list_resp['containerInstanceArns']
desc_resp = ECS.describe_container_instances(cluster=CLUSTER,
containerInstances=arns)
for container_instance in desc_resp['containerInstances']:
if container_instance['ec2InstanceId'] != instance_id:
continue

print('Found instance: id=%s, arn=%s, status=%s, runningTasksCount=%s' %
(instance_id, container_instance['containerInstanceArn'],
container_instance['status'], container_instance['runningTasksCount']))

return (container_instance['containerInstanceArn'],
container_instance['status'], container_instance['runningTasksCount'])

cluster_paginator = ECS.get_paginator('list_clusters')
for cluster_list_resp in cluster_paginator.paginate():
cluster_arns = cluster_list_resp['clusterArns']
cluster_desc_resp = ECS.describe_clusters(clusters=cluster_arns)
for cluster_resp in cluster_desc_resp['clusters']:
cluster = cluster_resp['clusterName']
paginator = ECS.get_paginator('list_container_instances')
for list_resp in paginator.paginate(cluster=cluster):
arns = list_resp['containerInstanceArns']
desc_resp = ECS.describe_container_instances(cluster=cluster,
containerInstances=arns)
for container_instance in desc_resp['containerInstances']:
if container_instance['ec2InstanceId'] != instance_id:
continue
print('Found instance: id=%s, arn=%s, status=%s, runningTasksCount=%s, cluster=%s' %
(
instance_id,
container_instance['containerInstanceArn'],
container_instance['status'],
container_instance['runningTasksCount'],
cluster
))
return (
container_instance['containerInstanceArn'],
container_instance['status'],
container_instance['runningTasksCount'],
cluster
)
return None, None, 0

def instance_has_running_tasks(instance_id):
(instance_arn, container_status, running_tasks) = find_ecs_instance_info(instance_id)
(instance_arn, container_status, running_tasks, cluster) = find_ecs_instance_info(instance_id)
if instance_arn is None:
print('Could not find instance ID %s. Letting autoscaling kill the instance.' %
(instance_id))
(instance_id))
return False

if container_status != 'DRAINING':
print('Setting container instance %s (%s) to DRAINING' %
(instance_id, instance_arn))
ECS.update_container_instances_state(cluster=CLUSTER,
containerInstances=[instance_arn],
status='DRAINING')

(instance_id, instance_arn))
ECS.update_container_instances_state(
cluster=cluster,
containerInstances=[instance_arn],
status='DRAINING'
)
return running_tasks > 0

def lambda_handler(event, context):
msg = json.loads(event['Records'][0]['Sns']['Message'])

if 'LifecycleTransition' not in msg.keys() or \
msg['LifecycleTransition'].find('autoscaling:EC2_INSTANCE_TERMINATING') == -1:
msg['LifecycleTransition'].find('autoscaling:EC2_INSTANCE_TERMINATING') == -1:
print('Exiting since the lifecycle transition is not EC2_INSTANCE_TERMINATING.')
return

if instance_has_running_tasks(msg['EC2InstanceId']):
print('Tasks are still running on instance %s; posting msg to SNS topic %s' %
(msg['EC2InstanceId'], event['Records'][0]['Sns']['TopicArn']))
(msg['EC2InstanceId'], event['Records'][0]['Sns']['TopicArn']))
time.sleep(5)
sns_resp = SNS.publish(TopicArn=event['Records'][0]['Sns']['TopicArn'],
Message=json.dumps(msg),
Subject='Publishing SNS msg to invoke Lambda again.')
sns_resp = SNS.publish(
TopicArn=event['Records'][0]['Sns']['TopicArn'],
Message=json.dumps(msg),
Subject='Publishing SNS msg to invoke Lambda again.'
)
print('Posted msg %s to SNS topic.' % (sns_resp['MessageId']))
else:
print('No tasks are running on instance %s; setting lifecycle to complete' %
(msg['EC2InstanceId']))

ASG.complete_lifecycle_action(LifecycleHookName=msg['LifecycleHookName'],
AutoScalingGroupName=msg['AutoScalingGroupName'],
LifecycleActionResult='CONTINUE',
InstanceId=msg['EC2InstanceId'])
(msg['EC2InstanceId']))
ASG.complete_lifecycle_action(
LifecycleHookName=msg['LifecycleHookName'],
AutoScalingGroupName=msg['AutoScalingGroupName'],
LifecycleActionResult='CONTINUE',
InstanceId=msg['EC2InstanceId']
)
LambdaInvokePermission:
Type: "AWS::Lambda::Permission"
Properties:
Expand Down