This script will
create a new key pair if one doesn't exist create a security group if one doesn't exist request a new spot instance wait for the spot request to be fulfilled wait for the instance to boot connect to it via SSH and run a script of your choiceInstall
First, install the dependencies. This is for Ubuntu Server 14.04.
sudo apt-get install -y python python-pip python-dev libffi-dev libssl-dev sudo pip install boto paramikoCreate a new file (e.g. launch-spot.py ) and give it exec permissions ( chmod +x launch-spot.py ).
#!/usr/bin/python2.7 -u # pip install boto paramiko import argparse import boto, boto.ec2, boto.ec2.blockdevicemapping, boto.manage import paramiko import os, sys, time #boto.set_stream_logger('boto') def launch_spot_instance(id, profile, spot_wait_sleep=5, instance_wait_sleep=3): ec2 = boto.ec2.connect_to_region(profile['region']) if not 'key_pair' in profile: profile['key_pair'] = ('KP-' + id, 'KP-' + id + '.pem') try: print >> sys.stderr, 'Creating key pair...', keypair = ec2.create_key_pair('KP-' + id) keypair.save('.') print >> sys.stderr, 'created' except boto.exception.EC2ResponseError as e: if e.code == 'InvalidKeyPair.Duplicate': print >> sys.stderr, 'already exists' else: raise e if not 'security_group' in profile: try: print >> sys.stderr, 'Creating security group...', sc = ec2.create_security_group('SG-' + id, 'Security Group for ' + id) for proto, fromport, toport, ip in profile['firewall']: sc.authorize(proto, fromport, toport, ip) profile['security_group'] = (sc.id, sc.name) print >> sys.stderr, 'created' except boto.exception.EC2ResponseError as e: if e.code == 'InvalidGroup.Duplicate': print >> sys.stderr, 'already exists' sc = ec2.get_all_security_groups(groupnames=['SG-' + id])[0] profile['security_group'] = (sc.id, sc.name) else: raise e existing_requests = ec2.get_all_spot_instance_requests(filters={'launch.group-id': profile['security_group'][0], 'state': ['open', 'active']}) if existing_requests: if len(existing_requests) > 1: raise Exception('Too many existing spot requests') print >> sys.stderr, 'Reusing existing spot request' spot_req_id = existing_requests[0].id else: bdm = boto.ec2.blockdevicemapping.BlockDeviceMapping() bdm['/dev/sda1'] = boto.ec2.blockdevicemapping.BlockDeviceType(volume_type='gp2', size=profile['disk_size'], delete_on_termination=profile['disk_delete_on_termination']) bdm['/dev/sdb'] = boto.ec2.blockdevicemapping.BlockDeviceType(ephemeral_name='ephemeral0') print >> sys.stderr, 'Requesting spot instance' spot_reqs = ec2.request_spot_instances( price=profile['price'], image_id=profile['image_id'], instance_type=profile['type'], placement=profile['region'] + profile['availability_zone'], security_groups=[profile['security_group'][1]], key_name=profile['key_pair'][0], block_device_map=bdm) spot_req_id = spot_reqs[0].id print >> sys.stderr, 'Waiting for launch', instance_id = None spot_tag_added = False while not instance_id: spot_req = ec2.get_all_spot_instance_requests(request_ids=[spot_req_id])[0] if not spot_tag_added: spot_req.add_tag('Name', id) spot_tag_added = True if spot_req.state == 'failed': raise Exception('Spot request failed') instance_id = spot_req.instance_id if not instance_id: print >> sys.stderr, '.', time.sleep(spot_wait_sleep) print >> sys.stderr print >> sys.stderr, 'Retrieving instance by id' reservations = ec2.get_all_instances(instance_ids=[instance_id]) instance = reservations[0].instances[0] instance.add_tag('Name', id) print >> sys.stderr, 'Got instance: ' + str(instance.id) + ' [' + instance.state + ']' print >> sys.stderr, 'Waiting for instance to boot', while not instance.state in ['running', 'terminated', 'shutting-down']: print >> sys.stderr, '.', time.sleep(instance_wait_sleep) instance.update() print >> sys.stderr if instance.state != 'running': raise Exception('Instance was terminated') return instance def connect_to_instance(ip, username, key_filename, timeout=10): print >> sys.stderr, 'Connecting to SSH [' + ip + '] ', client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) retries = 0 while retries < 30: try: print >> sys.stderr, '.', client.connect(ip, username=username, key_filename=key_filename, timeout=timeout) break except: retries += 1 print >> sys.stderr return client def setup_instance(id, instance, file, user_name, key_name): script = open(file, 'r').read().replace('\r', '') client = connect_to_instance(instance.ip_address, user_name, key_name) session = client.get_transport().open_session() session.set_combine_stderr(True) print >> sys.stderr, 'Running script: ' + os.path.relpath(file, os.getcwd()) session.exec_command(script) stdout = session.makefile() try: for line in stdout: print line.rstrip() except (KeyboardInterrupt, SystemExit): print >> sys.stderr, 'Ctrl-C, stopping' client.close() exit_code = session.recv_exit_status() print >> sys.stderr, 'Exit code: ' + str(exit_code) return exit_code == 0 if __name__ == '__main__': profiles = { '15G': { 'region': 'eu-west-1', 'availability_zone': 'a', 'price': '0.05', 'type': 'r3.large', 'image_id': 'ami-ed82e39e', 'username': 'ubuntu', #'key_pair': ('AWS-EU', 'eu-key.pem'), 'disk_size': 20, 'disk_delete_on_termination': True, 'scripts': [], 'firewall': [ ('tcp', 22, 22, '0.0.0.0/0') ] } } parser = argparse.ArgumentParser(description='Launch spot instance') parser.add_argument('-n', '--name', help='Name', required=True) parser.add_argument('-p', '--profile', help='Profile', default=profiles.keys()[0], choices=profiles.keys()) parser.add_argument('-s', '--script', help='Script path', action='append', default=[]) parser.add_argument('-i', '--interactive', help='Connect to SSH', action='store_true') args = parser.parse_args() profile = profiles[args.profile] try: instance = launch_spot_instance(args.name, profile) except boto.exception.NoAuthHandlerFound: print >> sys.stderr, 'Error: No credentials found, try setting the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables' sys.exit(1) for script in profile['scripts'] + args.script: if not setup_instance(id=args.name, instance=instance, file=script, user_name=profile['username'], key_name=profile['key_pair'][1]): break if args.interactive: print 'ssh ' + profile['username'] + '@' + instance.ip_address + ' -i ' + profile['key_pair'][1] + ' -oStrictHostKeyChecking=no'The script is less than 200 lines long and should be readable from top to bottom.
Use
Set your Amazon AWS access keys as environment variables.
Or you can read the boto documentation to find out how to store the keys in configuration files.
export AWS_ACCESS_KEY_ID="XXXXXXXXXXXXXXXXXXXX" export AWS_SECRET_ACCESS_KEY="XXXXXXXXXXXXXXXXXXXXXXXXXX"Then launch a new spot instance like this
$ ./launch-spot.py -n test -p 15G Creating key pair... created Creating security group... created Requesting spot instance Waiting for launch . . . . . . Retrieving instance by id Got instance: i-15fc09d5 [pending] Waiting for instance to boot . . . .-n stands for --name and it is how you can identify this spot instance.
-p stands for --profile and is a collection of settings for launching this instance (region, availability zone, instance type, max spot price, AMI, SSH username, disk size, security group rules, etc.)
The profiles are hard coded in the script. Feel free to modify the script to load them from an external configuration file (in JSON perhaps).
Currently, there is just one profile named 15G
profiles = { '15G': { 'region': 'eu-west-1', 'availability_zone': 'a', 'price': '0.05', 'type': 'r3.large', 'image_id': 'ami-ed82e39e', 'username': 'ubuntu', #'key_pair': ('AWS-EU', 'eu-key.pem'), 'disk_size': 20, 'disk_delete_on_termination': True, 'scripts': [], 'firewall': [ ('tcp', 22, 22, '0.0.0.0/0') ] } }You can run the script again and as long as you use the same name it will resume.
$ ./launch-spot.py -n test -p 15G Creating key pair... already exists Creating security group... already exists Reusing existing spot request Waiting for launch Retrieving instance by id Got instance: i-15fc09d5 [running] Waiting for instance to bootTo run a script on this instance via SSH use one or more -s arguments.
For example, if you have test.sh
#!/bin/bash touch /tmp/i-was-here ls -l /tmp/i-was-herethis will be the output of the script
$ ./launch-spot.py -n test -p 15G -s test.sh Creating key pair... already exists Creating security group... already exists Reusing existing spot request Waiting for launch Retrieving instance by id Got instance: i-15fc09d5 [running] Waiting for instance to boot Connecting to SSH [54.74.149.116] . Running script: test.sh -rw-rw-r-- 1 ubuntu ubuntu 0 Oct 10 11:53 /tmp/i-was-here Exit code: 0If you want to connect to this instance manually, you can use the -i flag:
$ ./launch-spot.py -n test -p 15G -i Creating key pair... already exists Creating security group... already exists Reusing existing spot request Waiting for launch Retrieving instance by id Got instance: i-15fc09d5 [running] Waiting for instance to boot ssh ubuntu@54.74.149.116 -i KP-test.pem -oStrictHostKeyChecking=noYou can copy & paste the last line in your terminal and connect to the instance.
As you can see, if you haven't specified key_pair in the profile, a new key pair will be created with the name KP-test and it will be saved as KP-test.pem .
If there's no security_group in the profile, a new one will be created with the name SG-test and the rules from the firewall profile setting will be applied. At a minimum, SSH from your IP should be allowed.
If you want to stop the spot instance, simply run sudo halt on the instance. The instance, the instance request and disks (if auto terminate is on) will automatically be shut down and terminated.
Scripting
If you use a simple bash script, I suggest using one like this. set -e will stop the execution if an error occurs. If the script finished successfully, it will not be run again.
#!/bin/bash set -e if [ ! -f /var/setup.done ]; then ... sudo touch /var/setup.done fiIf you share a provisioning script with Vagrant and AWS, you can use this line to check if you are running on AWS EC2 or locally on Vagrant.
export AWS=`curl -s -m 1 http://169.254.169.254/latest/meta-data/instance-id 2> /dev/null` export VAGRANT=`ls /vagrant 2> /dev/null | head -n 1`Amazon Ubuntu mirrors and apt-get can be slow and flaky. Especially if you hit CTRL+C while the script is executing apt-get and then run it again.
# this may or may not prevent "hash sum mismatch" errors sudo rm -rf /var/lib/apt/lists/* sudo apt-get clean # amazon mirrors are very slow sudo sed -i 's/us-east-1.ec2.archive.ubuntu.com/us.archive.ubuntu.com/g' /etc/apt/sources.list sudo sed -i 's/eu-west-1.ec2.archive.ubuntu.com/ie.archive.ubuntu.com/g' /etc/apt/sources.list # stay up to date sudo apt-get update # in case previous apt-get was interrupted sudo dpkg --configure -aCreate a file system on the instance disk and move /home , /tmp and /swapfile to it. This can be useful on large SSD backed instance disks.
export AWS=`curl -s -m 1 http://169.254.169.254/latest/meta-data/instance-id 2> /dev/null` export AWS_INSTANCE_STORE=1 if [ "$AWS" ] && [ "$AWS_INSTANCE_STORE" -e "1" ] && [ ! -d /mnt/instance ]; then # aws instance store sudo mkfs -t ext4 /dev/xvdb sudo mkdir -p /mnt/instance sudo mount /dev/xvdb /mnt/instance sudo chown -R $USER:$USER /mnt/instance # don't use root disk [ ! -d /mnt/instance/home ] && sudo mv /home/ubuntu /mnt/instance/home && sudo ln -s /mnt/instance/home /home/ubuntu [ ! -d /mnt/instance/tmp ] && sudo mv /tmp/ /mnt/instance/ && sudo ln -s /mnt/instance/tmp /tmp [ ! -f /mnt/instance/swapfile ] && sudo touch /mnt/instance/swapfile && sudo ln -s /mnt/instance/swapfile /swapfile fiAdd swap.
export SWAP="4G" if [ "$SWAP" ]; then sudo fallocate -l $SWAP /swapfile && sudo mkswap /swapfile && sudo swapon /swapfile sudo sysctl vm.swappiness=1 && sudo sysctl vm.vfs_cache_pressure=50 fiKeep /tmp in RAM.
export TMPFS=0 export TMPFS_SWAP="5G" if [ "$TMPFS" ] && [ "$TMPFS" -eq "1" ]; then sudo mount -o defaults,noatime,nosuid,nodev,noexec,mode=1777,size=500G -t tmpfs tmpfs /tmp [ "$TMPFS_SWAP" ] && sudo fallocate -l $TMPFS_SWAP /swapfile.tmpfs && sudo mkswap /swapfile.tmpfs && sudo swapon /swapfile.tmpfs fiTo upload/download files to S3 on AWS and use local files in /vagrant when running in Vagrant, you could do something like this.
export AWS=`curl -s -m 1 http://169.254.169.254/latest/meta-data/instance-id 2> /dev/null` export AWS_ACCESS_KEY_ID="XXXXXXXXXXXXXXXXXXXX" export AWS_SECRET_ACCESS_KEY="XXXXXXXXXXXXXXXXXXXXXXXXXX" export VAGRANT=`ls /vagrant 2> /dev/null | head -n 1` sudo apt-get install -y python-pip sudo pip install awscli [ "$AWS_ACCESS_KEY_ID" ] && [ "$AWS_SECRET_ACCESS_KEY" ] && echo -e "$AWS_ACCESS_KEY_ID\n$AWS_SECRET_ACCESS_KEY\n\n" | aws configure if [ "$VAGRANT" ]; then cp /vagrant/file.tar.gz file.tar.gz elif [ "$AWS" ]; then aws s3 cp s3://bucket/file.tar.gz file.tar.gz fi ... if [ "$VAGRANT" ]; then cp result.tar.gz /vagrant/result.tar.gz elif [ "$AWS" ]; then aws s3 cp result.tar.gz s3://bucket/result.tar.gz --storage-class REDUCED_REDUNDANCY fi