1#!/bin/bash 2# Copyright (C) 2019 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16set -eux -o pipefail 17 18# num-workers, {sandbox,worker}-img are set at VM creation time in the Makefile. 19 20ATTRS='http://metadata.google.internal/computeMetadata/v1/instance/attributes' 21URL="$ATTRS/num-workers" 22NUM_WORKERS=$(curl --silent --fail -H'Metadata-Flavor:Google' $URL || echo 1) 23 24URL="$ATTRS/sandbox-img" 25SANDBOX_IMG=$(curl --silent --fail -H'Metadata-Flavor:Google' $URL) 26 27URL="$ATTRS/worker-img" 28WORKER_IMG=$(curl --silent --fail -H'Metadata-Flavor:Google' $URL) 29 30for SSD in /dev/nvme0n*; do 31mkswap $SSD 32swapon -p -1 $SSD 33done 34 35# This is used by the sandbox containers, NOT needed by the workers. 36# Rationale for size=500G: by default tmpfs mount are set to RAM/2, which makes 37# the CI depend too much on the underlying VM. Here and below, we pick an 38# arbitrary fixed size (we use local scratch NVME as a swap device). 39export SHARED_WORKER_CACHE=/mnt/disks/shared_worker_cache 40rm -rf $SHARED_WORKER_CACHE 41mkdir -p $SHARED_WORKER_CACHE 42mount -t tmpfs tmpfs $SHARED_WORKER_CACHE -o mode=777,size=500G 43 44# This is used to queue build artifacts that are uploaded to GCS. 45export ARTIFACTS_DIR=/mnt/disks/artifacts 46rm -rf $ARTIFACTS_DIR 47mkdir -p $ARTIFACTS_DIR 48mount -t tmpfs tmpfs $ARTIFACTS_DIR -o mode=777,size=500G 49 50# Pull the latest images from the registry. 51docker pull $WORKER_IMG 52docker pull $SANDBOX_IMG 53 54# Create the restricted bridge for the sandbox container. 55# Prevent access to the metadata server and impersonation of service accounts. 56docker network rm sandbox 2>/dev/null || true # Handles the reboot case. 57docker network create sandbox -o com.docker.network.bridge.name=sandbox 58sudo iptables -I DOCKER-USER -i sandbox -d 169.254.0.0/16 -j REJECT 59 60# These args will be appended to the docker run invocation for the sandbox. 61export SANDBOX_NETWORK_ARGS="--network sandbox --dns 8.8.8.8" 62 63# The worker_main_loop.py script creates one docker sandbox container for 64# each job invocation. It needs to talk back to the host docker to do so. 65# This implies that the worker container is trusted and should never run code 66# from the repo, as opposite to the sandbox container that is isolated. 67for i in $(seq $NUM_WORKERS); do 68 69# We manually mount a tmpfs mount ourselves because Docker doesn't allow to 70# both override tmpfs-size AND "-o exec" (see also 71# https://github.com/moby/moby/issues/32131) 72SANDBOX_TMP=/mnt/disks/sandbox-$i-tmp 73rm -rf $SANDBOX_TMP 74mkdir -p $SANDBOX_TMP 75mount -t tmpfs tmpfs $SANDBOX_TMP -o mode=777,size=100G 76 77docker rm -f worker-$i 2>/dev/null || true 78docker run -d \ 79 -v /var/run/docker.sock:/var/run/docker.sock \ 80 -v $ARTIFACTS_DIR:$ARTIFACTS_DIR \ 81 --env SHARED_WORKER_CACHE="$SHARED_WORKER_CACHE" \ 82 --env SANDBOX_NETWORK_ARGS="$SANDBOX_NETWORK_ARGS" \ 83 --env ARTIFACTS_DIR="$ARTIFACTS_DIR" \ 84 --env SANDBOX_TMP="$SANDBOX_TMP" \ 85 --env WORKER_HOST="$(hostname)" \ 86 --name worker-$i \ 87 --hostname worker-$i \ 88 --log-driver gcplogs \ 89 $WORKER_IMG 90done 91 92 93# Register a systemd service to stop worker containers gracefully on shutdown. 94cat > /etc/systemd/system/graceful_shutdown.sh <<EOF 95#!/bin/sh 96logger 'Shutting down worker containers' 97docker ps -q -f 'name=worker-\d+$' | xargs docker stop -t 120 98exit 0 99EOF 100 101chmod 755 /etc/systemd/system/graceful_shutdown.sh 102 103# This service will cause the graceful_shutdown.sh to be invoked before stopping 104# docker, hence before tearing down any other container. 105cat > /etc/systemd/system/graceful_shutdown.service <<EOF 106[Unit] 107Description=Worker container lifecycle 108Wants=gcr-online.target docker.service 109After=gcr-online.target docker.service 110Requires=docker.service 111 112[Service] 113Type=oneshot 114RemainAfterExit=yes 115ExecStop=/etc/systemd/system/graceful_shutdown.sh 116EOF 117 118systemctl daemon-reload 119systemctl start graceful_shutdown.service