Added AWS lambda function to terminate stale citest instances
authorVarac <varac@leap.se>
Sun, 18 Jun 2017 21:06:43 +0000 (23:06 +0200)
committerVarac <varac@leap.se>
Sun, 18 Jun 2017 21:06:48 +0000 (23:06 +0200)
see
https://we.riseup.net/leap+infrastructure/aws#watchdog-to-terminate-stale-ci-test-ec2-instances

for more details.

aws/lambda/terminate_citest_instances.py [new file with mode: 0755]

diff --git a/aws/lambda/terminate_citest_instances.py b/aws/lambda/terminate_citest_instances.py
new file mode 100755 (executable)
index 0000000..ddc4710
--- /dev/null
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Terminate all "citest*" instances after given interval so they
+# don't keep running eating our budget
+#
+# see http://boto3.readthedocs.io/en/latest/guide/ec2-example-managing-instances.html
+# for more boto examples.
+
+import boto3
+import pprint
+from datetime import datetime, timezone, timedelta
+import re
+
+MAX_HOURS = 12
+
+
+# Initialize PrettyPrinter
+pp = pprint.PrettyPrinter(indent=2)
+
+
+# Terminate after 12h
+limit = timedelta(hours=MAX_HOURS)
+
+
+def lambda_handler(event, context):
+
+    now = datetime.now(timezone.utc)
+
+    ec2 = boto3.client('ec2')
+    response = ec2.describe_instances()
+
+    reservations = response['Reservations']
+
+    for reservation in reservations:
+        instance=reservation['Instances'][0]
+        id  = instance['InstanceId']
+
+        # 'LaunchTime': datetime.datetime(2017, 6, 17, 16, 12, 50, tzinfo=tzutc()),
+        launch = instance['LaunchTime']
+        uptime = now - launch
+
+        # Get instance tags
+        tags_response = ec2.describe_tags(
+            Filters=[{
+                'Name': 'resource-id',
+                'Values': [ id ],
+            }])
+        tags = tags_response['Tags']
+
+        # Get instance node_name from tags
+        node_name = ''
+        for tag in tags:
+            key = tag['Key']
+            if key == 'node_name':
+                node_name = tag['Value']
+                break
+
+        print(id + ': node_name=' + node_name + ', uptime=' + str(uptime), end='')
+
+        if re.match(r'citest.*', node_name ):
+            if (launch + limit) < now:
+                print(', running longer than ' + str(MAX_HOURS) + 'h. TERMINATING.')
+                response_terminate = ec2.terminate_instances( InstanceIds=[id] )
+                pp.pprint(response_terminate['TerminatingInstances'])
+                print()
+            else:
+                print(', running less than ' + str(MAX_HOURS) + 'h. Ignoring.')
+        else:
+            print(', not a citest instance. Ignoring.')
+
+
+lambda_handler('test', 'test')