xref: /aosp_15_r20/external/perfetto/infra/ci/worker/gce-startup-script.sh (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1#!/bin/bash
2# Copyright (C) 2019 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16set -eux -o pipefail
17
18# num-workers, {sandbox,worker}-img are set at VM creation time in the Makefile.
19
20ATTRS='http://metadata.google.internal/computeMetadata/v1/instance/attributes'
21URL="$ATTRS/num-workers"
22NUM_WORKERS=$(curl --silent --fail -H'Metadata-Flavor:Google' $URL || echo 1)
23
24URL="$ATTRS/sandbox-img"
25SANDBOX_IMG=$(curl --silent --fail -H'Metadata-Flavor:Google' $URL)
26
27URL="$ATTRS/worker-img"
28WORKER_IMG=$(curl --silent --fail -H'Metadata-Flavor:Google' $URL)
29
30for SSD in /dev/nvme0n*; do
31mkswap $SSD
32swapon -p -1 $SSD
33done
34
35# This is used by the sandbox containers, NOT needed by the workers.
36# Rationale for size=500G: by default tmpfs mount are set to RAM/2, which makes
37# the CI depend too much on the underlying VM. Here and below, we pick an
38# arbitrary fixed size (we use local scratch NVME as a swap device).
39export SHARED_WORKER_CACHE=/mnt/disks/shared_worker_cache
40rm -rf $SHARED_WORKER_CACHE
41mkdir -p $SHARED_WORKER_CACHE
42mount -t tmpfs tmpfs $SHARED_WORKER_CACHE -o mode=777,size=500G
43
44# This is used to queue build artifacts that are uploaded to GCS.
45export ARTIFACTS_DIR=/mnt/disks/artifacts
46rm -rf $ARTIFACTS_DIR
47mkdir -p $ARTIFACTS_DIR
48mount -t tmpfs tmpfs $ARTIFACTS_DIR -o mode=777,size=500G
49
50# Pull the latest images from the registry.
51docker pull $WORKER_IMG
52docker pull $SANDBOX_IMG
53
54# Create the restricted bridge for the sandbox container.
55# Prevent access to the metadata server and impersonation of service accounts.
56docker network rm sandbox 2>/dev/null || true  # Handles the reboot case.
57docker network create sandbox -o com.docker.network.bridge.name=sandbox
58sudo iptables -I DOCKER-USER -i sandbox -d 169.254.0.0/16 -j REJECT
59
60# These args will be appended to the docker run invocation for the sandbox.
61export SANDBOX_NETWORK_ARGS="--network sandbox --dns 8.8.8.8"
62
63# The worker_main_loop.py script creates one docker sandbox container for
64# each job invocation. It needs to talk back to the host docker to do so.
65# This implies that the worker container is trusted and should never run code
66# from the repo, as opposite to the sandbox container that is isolated.
67for i in $(seq $NUM_WORKERS); do
68
69# We manually mount a tmpfs mount ourselves because Docker doesn't allow to
70# both override tmpfs-size AND "-o exec" (see also
71# https://github.com/moby/moby/issues/32131)
72SANDBOX_TMP=/mnt/disks/sandbox-$i-tmp
73rm -rf $SANDBOX_TMP
74mkdir -p $SANDBOX_TMP
75mount -t tmpfs tmpfs $SANDBOX_TMP -o mode=777,size=100G
76
77docker rm -f worker-$i 2>/dev/null || true
78docker run -d \
79  -v /var/run/docker.sock:/var/run/docker.sock \
80  -v $ARTIFACTS_DIR:$ARTIFACTS_DIR \
81  --env SHARED_WORKER_CACHE="$SHARED_WORKER_CACHE" \
82  --env SANDBOX_NETWORK_ARGS="$SANDBOX_NETWORK_ARGS" \
83  --env ARTIFACTS_DIR="$ARTIFACTS_DIR" \
84  --env SANDBOX_TMP="$SANDBOX_TMP" \
85  --env WORKER_HOST="$(hostname)" \
86  --name worker-$i \
87  --hostname worker-$i \
88  --log-driver gcplogs \
89  $WORKER_IMG
90done
91
92
93# Register a systemd service to stop worker containers gracefully on shutdown.
94cat > /etc/systemd/system/graceful_shutdown.sh <<EOF
95#!/bin/sh
96logger 'Shutting down worker containers'
97docker ps -q  -f 'name=worker-\d+$' | xargs docker stop -t 120
98exit 0
99EOF
100
101chmod 755 /etc/systemd/system/graceful_shutdown.sh
102
103# This service will cause the graceful_shutdown.sh to be invoked before stopping
104# docker, hence before tearing down any other container.
105cat > /etc/systemd/system/graceful_shutdown.service <<EOF
106[Unit]
107Description=Worker container lifecycle
108Wants=gcr-online.target docker.service
109After=gcr-online.target docker.service
110Requires=docker.service
111
112[Service]
113Type=oneshot
114RemainAfterExit=yes
115ExecStop=/etc/systemd/system/graceful_shutdown.sh
116EOF
117
118systemctl daemon-reload
119systemctl start graceful_shutdown.service