#!/bin/bash # # in case the script gets canceled, use a trap to finally remove the # lockfile that indicates a running process trap "cleanup" INT TERM usage() { cat << EOF usage: $0 [options] command [arguments...] This script runs the leap platform deploy tests OPTIONS -a|--all run command on all nodes -c|--config file specify config file -h|--help show help -l|--lock refuse to deploy if lockfile from previous failures exists -V|--versions show versions/git revision of leap_cli and leap_platoform in provider dir COMMANDS bootstrap bootstrap node(s): - leap local start - leap node int - sets up hostname and runs apt-get dist-upgrade - leap local save create_provider creates a provider instance deploy deploy node(s) init_deploy initialize node, then do a deploy destroy_deploy destroy vms, init, and deploy reset_deploy reset and deploy node(s) test run leap test EOF } add_nodes() { suffix=$IP_SUFFIX_START for i in "$@" do node=${i%:*} services=${i#*:} let suffix++ ip="${IP_PREFIX}.$suffix" case $services in openvpn) config="openvpn.gateway_address:${IP_PREFIX}.98 openvpn.second_gateway_address:${IP_PREFIX}.99" ;; *) config= ;; esac $LEAP_CMD node add --local "$node" ip_address:"$ip" $config services:"$services" done } destroy_vms() { for vm in "$@" do $LEAP_CMD local destroy $vm done } bootstrap_nodes() { for vm in $@ do $LEAP_CMD $OPTS local start "$vm" wait_for_node "$vm" $LEAP_CMD $OPTS node init "$vm" # set hostname + do dist-upgrade $LEAP_CMD $OPTS deploy "$vm" --tags site_apt::dist_upgrade,site_config::hosts,site_squid_deb_proxy::client # make sure machines are rebooted in order to be able to load kernel modules after a kernel update # https://leap.se/code/issues/6494 cd ${PROVIDERDIR}/test vagrant reload $vm $LEAP_CMD $OPTS local save "$vm" done } cleanup () { echo "cleaning up..." [ -e $LOCKFILE ] && rm $LOCKFILE exit } check_for_running_instances() { if [[ -f $LOCKFILE ]] then echo "Lockfile found at $LOCKFILE - maybe other process(es) found running for $(basename "$0") - exiting. Please investigate and then remove lockfile." exit 1 fi if [[ $lock && -f $FAILURE_LOCKFILE ]] then subj="WARNING: CI failure lockfile found for branch ${PLATFORM_BRANCH} - previous deploy tests failed !" msg="CI lock found, and --lock in use. This means that leap test failed on the previous run.\n Please investigate and then remove $FAILURE_LOCKFILE\n\n" if [ "$MAIL_TO" != '' ]; then echo "$subj Sending mail to $MAIL_TO:" sendemail -f "$MAIL_FROM" -t "$MAIL_TO" -o tls=no -m "${msg}" -u "${subj}" fi exit 1 fi } create_provider() { if [ -e "$PROVIDERDIR" ] then echo "$PROVIDERDIR" exists - exiting exit 1 fi git clone -b "$PLATFORM_BRANCH" --recursive https://leap.se/git/leap_platform.git "$PLATFORMDIR" mkdir -p "$PROVIDERDIR" cd "$PROVIDERDIR" $LEAP_CMD $OPTS new --contacts "$CONTACTS" --domain "$DOMAIN" --name "$PROVIDER" --platform="$PLATFORMDIR" . # for now, we use the vagrant pubkey until https://leap.se/code/issues/2039 is solved $LEAP_CMD $OPTS add-user --self --ssh-pub-key="$SSHKEY" $LEAP_CMD $OPTS cert ca && $LEAP_CMD $OPTS cert csr # copy for faster testing #cp $ROOTDIR/dh.pem.test $PROVIDERDIR/files/ca/dh.pem $LEAP_CMD $OPTS cert dh add_nodes $NODES git init git add . git commit -m"finished create_provider" } deploy() { # we need to deploy with verbose level 2, and filter out unwanted stuff # until puppet errors show up in verbose level 0 +1 (#1750) FILTER_CLI='= read|= loading|= no change| - executing| = executing| = applying| = ran git| = checking| = synching| = skipping file_path| - cd .*; rsync -| - hiera| = created hiera/| = updated hiera/| = updated secrets.json| - cd /root/| - rolling backexecuting| - files/|\[bin,tests,puppet\] ->|] Hostname updated.| = Updating submodule puppet/modules|Warning: Permanently added.*to the list of known hosts.| = leap command v| = leap platform v| - \[.*\] ok| - \[.*\] STARTING APPLY| - \[.*\] APPLY COMPLETE' FILTER_PUPPET="] notice: |] No change to hostname|] Puppet apply complete \(changes made\).|] warning: Dynamic lookup|] warning: Scope\(Class|Skipping because of failed dependencies|warning: You cannot collect without storeconfigs being set|warning: default \`to_a' will be obsolete" if [ -n "$FILTER_COMMON" ] then FILTER_ALL="($FILTER_CLI|$FILTER_PUPPET|$FILTER_COMMON)" else FILTER_ALL="($FILTER_CLI|$FILTER_PUPPET)" fi for vm in "$@" do [ -e "$LOGDIR" ] || mkdir -p "$LOGDIR" date=$( date +"%F-%H%M%S" ) LOG1="$LOGDIR/deploy-$vm.log" LOG2="$LOGDIR/deploy-$vm-$date.log" ERRLOG1="$LOGDIR/deploy-$vm-$date-error.log" ERRLOG2="$LOGDIR/deploy-error.log" echo "Deploying \"$vm\" on $( date )"|tee -a "$LOG1" "$LOG2" $LEAP_CMD $OPTS -v 2 deploy "$vm" 2>&1 | ts | tee -a "$LOG1" "$LOG2" | egrep -v "$FILTER_ALL" | tee -a "$ERRLOG1" "$ERRLOG2" > /dev/null # send an host-specific error mail on deploy failures if [ -s "$ERRLOG1" ] then touch $FAILURE_LOCKFILE versions=$( versions ) subj="WARNING - \"leap deploy\" of platform $PLATFORM_BRANCH branch on \"$vm\" had errors !" echo "$( date ): $msg" | tee -a "$LOG1" "$LOG2" "$ERRLOG2" echo msg="Output of error log below:\n\n$( cat "$ERRLOG1" ) \n\n" msg="${msg}-------------------------------------------------------------------\n\n" msg="${msg}error log: ${ERRLOG1}\n" msg="${msg}comlete log: ${LOG2}\n\n" msg="${msg}Tested on $( date ) on \"$vm\" with following versions/git commit IDs: \n\n$versions" cat "$ERRLOG1" if [ "$MAIL_TO" != '' ]; then echo "Sending this mail to $MAIL_TO:" sendemail -f "$MAIL_FROM" -t "$MAIL_TO" -o tls=no -m "${msg}" -u "${subj}" fi else echo "Deploy to $vm on $( date ) went fine."| tee -a "$LOG1" "$LOG2" rm "$ERRLOG1" fi done } get_ip () { grep ip_address "$PROVIDERDIR/nodes/$1.json" |cut -f 2 -d:|sed 's/[ ",]//g' } ip_pingable () { ping -q -W10 -c1 "$1" >/dev/null 2>&1 return $? } log_start() { echo echo "Starting $0 on $( date )" } deploy_failure_email() { # only send out a mail on success, because there are mails send out # for every hosts that has deploy errors anyway if [ ! -e "$FAILURE_LOCKFILE" -a -n "$MAIL_TO" ] then versions=$( versions ) subj="OK - \"leap deploy\" of platform $PLATFORM_BRANCH branch went fine." msg="Tested on $( date ) on these nodes: \"$nodes\"\nwith following versions/git commit IDs: \n\n$versions" echo "Sending deploy success mail to $MAIL_TO" sendemail -f "$MAIL_FROM" -t "$MAIL_TO" -o tls=no -m "${msg}" -u "${subj}" fi } test_failure_email() { test_failure=$? versions=$( versions ) msg="Tested on $( date ) on these nodes: \"$nodes\"\nwith following versions/git commit IDs: \n\n$versions" if [ $test_failure -eq 0 ] then subj="OK - \"leap test\" of platform $PLATFORM_BRANCH branch is all green." else subj="WARNING - \"leap test\" of platform $PLATFORM_BRANCH branch failed !!" fi echo "$subj" echo "Sending test mail to $MAIL_TO" # unfortunatly, no tls atm, fixed in sendemail 1.56-3 # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=679911 sendemail -f "$MAIL_FROM" -t "$MAIL_TO" -o tls=no -m "${msg}" -u "${subj}" -a "$TEST_LOG2" } init_deploy() { # init_deploy tests that a re-init of a node, then a deploy works this # accepts that the limited tagged deploy that happens in bootstrap_nodes() # is ok. The following steps happen: # #. make sure the platform and leap cli are up-to-date #. reset the nodes to their saved state (saved is after bootstrap_nodes has # been run) #. run bootstrap_nodes (this will: start vm, run init, run a limited tag # deploy, then reset the node) #. deploy the nodes #. run tests #. send email local nodes="$*" update_platform update_leap_cli cd "$PROVIDERDIR" log_start echo "Starting init_deploy for nodes $@ as background tasks on $( date )" for i in $nodes do $LEAP_CMD $OPTS local reset "$i" wait_for_node "$i" bootstrap_nodes "$i" # run cutom provisioning script, i.e. specified in ~/.leaprc cd test vagrant provision "$i" cd .. deploy "$i" & done # needed in a detached screen session, otherwise it would terminate before deploy jobs # have finished echo "Waiting until last deploy process has finished..." wait # send mail if deployment fails deploy_failure_email # run tests run_tests # send mail if tests fail test_failure_email } reset_deploy() { # reset_deploy tests that a deploy works after the node has been reset to # the bootstrap_nodes() state it does not re-run bootstrap_nodes(), so a # 'leap node init' is not run again, this is usually sufficient. The # following steps happen: # #. make sure the platform and leap cli are up-to-date #. reset the nodes to their saved state (saved is after bootstrap_nodes has # been run) #. deploy the nodes #. run tests #. send email local nodes="$*" update_platform update_leap_cli cd "$PROVIDERDIR" log_start echo "Starting reset_deploy for nodes $@ as background tasks on $( date )" for i in $nodes do $LEAP_CMD $OPTS local reset "$i" wait_for_node "$i" # run cutom provisioning script, i.e. specified in ~/.leaprc cd test vagrant provision "$i" cd .. deploy "$i" & done # needed in a detached screen session, otherwise it would terminate before deploy jobs # have finished echo "Waiting until last deploy process has finished..." wait # send mail if deployment fails deploy_failure_email # run tests run_tests # send mail if tests fail test_failure_email } destroy_deploy() { # destroy_deploy tests a full-cycle, it destroys the VMs, re-creates them # from scratch and then bootstraps them. The following steps happen: #. make sure the platform and leap cli are up-to-date #. destroy the vms #. run bootstrap_nodes (this will: start vm, run init, run a limited tag # deploy, then reset the node) #. deploy the nodes #. run tests #. send email local nodes="$*" update_platform update_leap_cli cd "$PROVIDERDIR" log_start echo "Starting destroy_deploy for nodes $@ as background tasks on $( date )" destroy_vms "$@" for i in $nodes do bootstrap_nodes "$i" # run cutom provisioning script, i.e. specified in ~/.leaprc cd test vagrant provision "$i" cd .. deploy "$i" & done # needed in a detached screen session, otherwise it would terminate before deploy jobs # have finished echo "Waiting until last deploy process has finished..." wait # send mail if deployment fails deploy_failure_email # run tests run_tests # send mail if tests fail test_failure_email } ssh_up () { nc -w 4 "$1" 22 > /dev/null return $? } run_tests () { date=$( date +"%F-%H%M%S" ) TEST_FILTER='net.ssh.authentication.agent.*could not connect to ssh-agent' echo -e "\nRunning leap test on $date" | tee -a "$TEST_LOG1" "$TEST_LOG2" $LEAP_CMD $OPTS test --continue 2>&1 | ts | egrep -v "$TEST_FILTER" | tee -a "$TEST_LOG1" "$TEST_LOG2" test_failure=${PIPESTATUS[0]} if [ "$test_failure" -ne 0 ] then echo 'WARNING - "leap test" failed !' | tee -a "$TEST_LOG1" "$TEST_LOG2" touch $FAILURE_LOCKFILE else echo 'OK - "leap test" is all green !' | tee -a "$TEST_LOG1" "$TEST_LOG2" fi return "$test_failure" } update_leap_cli () { cd "$LEAP_SRC" git pull sudo bundle } update_platform () { cd "$PLATFORMDIR" # works also with forces updates, i.e. reabased branches like citest # http://stackoverflow.com/questions/4550937/how-to-force-update-when-doing-git-pull/14359894#14359894 # throw away local uncommitted changes git reset --hard HEAD # remove untracked files git clean -f git fetch git checkout origin/$PLATFORM_BRANCH git checkout -B $PLATFORM_BRANCH git submodule sync git submodule update --init } versions () { cd "$PROVIDERDIR" [ -d .git ] && provider_head=$( git rev-parse HEAD ) if [ -n "$provider_head" ] then provider_head='not under version control' fi echo "Provider ($PROVIDERDIR): $provider_head" echo $LEAP_CMD -v 2 list | grep ' = leap command v' echo echo #echo "leap_platform:" $LEAP_CMD -v 2 list | grep ' = leap platform v' echo echo } wait_for_node() { vm=$1 ip=$( get_ip "$vm" ) online=0 echo "Waiting for ssh on VM $vm (IP: $ip) to come up..." while [ $online -eq 0 ] do ssh_up "$ip" && online=1 sleep 1 done } # http://docs.vagrantup.com/v2/providers/default.html export VAGRANT_DEFAULT_PROVIDER="libvirt" config="" all=false print_versions=false # default in lib/leap_cli/leapfile.rb IP_PREFIX='10.5.5' if ! options=$(getopt -o avVlc:h -l all,lock,verbose,versions,config:,help -- "$@") then # something went wrong, getopt will put out an error message for us usage exit 1 fi eval set -- "$options" while [ $# -gt 0 ] do case $1 in -a|--all) all=true;; -c|--config) config=$2; shift ;; -h|--help) usage; exit 1;; -l|--lock) lock=true;; -V|--versions) print_versions=true;; (--) shift; break;; (-*) echo "$0: error - unrecognized option $1" 1>&2; exit 1;; (*) break;; esac shift done cmd=$1 shift nodelist="$*" #echo "config: $config" #echo "cmd: $cmd" #echo "nodelist: $nodelist" #echo "all: $all" if [ -z "$config" ] then usage echo "Please provide a config file" exit 1 fi # provider specific config source "$config" # common config for all providers source /etc/leap/platform-test-common.cfg date=$( date +"%F-%H%M%S" ) TEST_LOG1="$LOGDIR/test.log" TEST_LOG2="$LOGDIR/test-$date.log" LOCKFILE_DIR="/var/run/lock/leap_ci/${PLATFORM_BRANCH}" [ -e $LOCKFILE_DIR ] || mkdir -p $LOCKFILE_DIR LOCKFILE="${LOCKFILE_DIR}/$(basename $0).lock" FAILURE_LOCKFILE="${LOCKFILE_DIR}/failure.lock" check_for_running_instances || exit $? # set branch specifc lockfile so a deploy test cannot be run twice in parallel touch $LOCKFILE [ -e "$PROVIDERDIR" ] && cd "$PROVIDERDIR" if $print_versions then versions=$( versions ) echo "$versions" echo exit 0 fi if [ -z "$LEAP_CMD" -o -z "$LEAP_SRC" ] then echo "please provide a path to the leap_cli binary and the source in the config file, using the LEAP_CMD and LEAP_SRC var." exit 1 fi if $all ; then # use NODES variable from the config file nodes=$NODES # strip services from nodes_services # i.e. nodes_services='redevcouchdb1:couchdb redevcouchdb2:couchdb' # -> nodes='redevcouchdb1 redevcouchdb2' nodes=$( echo "$NODES" | sed 's/:[[:alnum:],]*//g' ) else # use nodelist provided via cmdline parameters nodes=$nodelist fi case $cmd in add_nodes) add_nodes "$nodes";; bootstrap) bootstrap_nodes "$nodes";; create_provider) create_provider;; deploy) deploy "$nodes";; destroy_deploy) destroy_deploy "$nodes";; init_deploy) init_deploy "$nodes";; reset_deploy) reset_deploy "$nodes";; test) run_tests "$nodes";; (*) usage; echo "Please specify a command."; exit 1;; esac cleanup