xref: /aosp_15_r20/external/autotest/utils/frozen_chromite/lib/gce.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# -*- coding: utf-8 -*-
2# Copyright 2015 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A convinient wrapper of the GCE python API.
7
8Public methods in class GceContext raise HttpError when the underlining call to
9Google API fails, or gce.Error on other failures.
10"""
11
12from __future__ import print_function
13
14from googleapiclient.discovery import build
15from googleapiclient.errors import HttpError
16from googleapiclient.http import HttpRequest
17import httplib2
18from oauth2client.client import GoogleCredentials
19
20from autotest_lib.utils.frozen_chromite.lib import cros_logging as logging
21from autotest_lib.utils.frozen_chromite.lib import timeout_util
22
23
24class Error(Exception):
25  """Base exception for this module."""
26
27
28class ResourceNotFoundError(Error):
29  """Exceptions raised when requested GCE resource was not found."""
30
31
32class RetryOnServerErrorHttpRequest(HttpRequest):
33  """A HttpRequest that will be retried on server errors automatically."""
34
35  def __init__(self, num_retries, *args, **kwargs):
36    """Constructor for RetryOnServerErrorHttpRequest."""
37    self.num_retries = num_retries
38    super(RetryOnServerErrorHttpRequest, self).__init__(*args, **kwargs)
39
40  def execute(self, http=None, num_retries=None):
41    """Excutes a RetryOnServerErrorHttpRequest.
42
43    HttpRequest.execute() has the option of automatically retrying on server
44    errors, i.e., 500 status codes. Call it with a non-zero value of
45    |num_retries| will cause failed requests to be retried.
46
47    Args:
48      http: The httplib2.http to send this request through.
49      num_retries: Number of retries. Class default value will be used if
50          omitted.
51
52    Returns:
53      A deserialized object model of the response body as determined
54          by the postproc. See HttpRequest.execute().
55    """
56    return super(RetryOnServerErrorHttpRequest, self).execute(
57        http=http, num_retries=num_retries or self.num_retries)
58
59
60def _GetMetdataValue(metadata, key):
61  """Finds a value corresponding to a given metadata key.
62
63  Args:
64    metadata: metadata object, i.e. a dict containing containing 'items'
65      - a list of key-value pairs.
66    key: name of the key.
67
68  Returns:
69    Corresponding value or None if it was not found.
70  """
71  for item in metadata['items']:
72    if item['key'] == key:
73      return item['value']
74  return None
75
76
77def _UpdateMetadataValue(metadata, key, value):
78  """Updates a single key-value pair in a metadata object.
79
80  Args:
81    metadata: metadata object, i.e. a dict containing containing 'items'
82      - a list of key-value pairs.
83    key: name of the key.
84    value: new value for the key, or None if it should be removed.
85  """
86  items = metadata.setdefault('items', [])
87  for item in items:
88    if item['key'] == key:
89      if value is None:
90        items.remove(item)
91      else:
92        item['value'] = value
93      return
94
95  if value is not None:
96    items.append({
97        'key': key,
98        'value': value,
99    })
100
101
102class GceContext(object):
103  """A convinient wrapper around the GCE Python API."""
104
105  # These constants are made public so that users can customize as they need.
106  DEFAULT_TIMEOUT_SEC = 5 * 60
107  INSTANCE_OPERATIONS_TIMEOUT_SEC = 10 * 60
108  IMAGE_OPERATIONS_TIMEOUT_SEC = 10 * 60
109
110  _GCE_SCOPES = (
111      'https://www.googleapis.com/auth/compute',  # CreateInstance, CreateImage
112      'https://www.googleapis.com/auth/devstorage.full_control', # CreateImage
113  )
114  _DEFAULT_NETWORK = 'default'
115  _DEFAULT_MACHINE_TYPE = 'n1-standard-8'
116
117  # Project default service account and scopes.
118  _DEFAULT_SERVICE_ACCOUNT_EMAIL = 'default'
119  # The list is in line with what the gcloud cli uses.
120  # https://cloud.google.com/sdk/gcloud/reference/compute/instances/create
121  _DEFAULT_INSTANCE_SCOPES = [
122      'https://www.googleapis.com/auth/cloud.useraccounts.readonly',
123      'https://www.googleapis.com/auth/devstorage.read_only',
124      'https://www.googleapis.com/auth/logging.write',
125  ]
126
127  # This is made public to allow easy customization of the retry behavior.
128  RETRIES = 2
129
130  def __init__(self, project, zone, credentials, thread_safe=False):
131    """Initializes GceContext.
132
133    Args:
134      project: The GCP project to create instances in.
135      zone: The default zone to create instances in.
136      credentials: The credentials used to call the GCE API.
137      thread_safe: Whether the client is expected to be thread safe.
138    """
139    self.project = project
140    self.zone = zone
141
142    def _BuildRequest(http, *args, **kwargs):
143      """Custom request builder."""
144      return self._BuildRetriableRequest(self.RETRIES, http, thread_safe,
145                                         credentials, *args, **kwargs)
146
147    self.gce_client = build('compute', 'v1', credentials=credentials,
148                            requestBuilder=_BuildRequest)
149
150    self.region = self.GetZoneRegion(zone)
151
152  @classmethod
153  def ForServiceAccount(cls, project, zone, json_key_file):
154    """Creates a GceContext using service account credentials.
155
156    About service account:
157    https://developers.google.com/api-client-library/python/auth/service-accounts
158
159    Args:
160      project: The GCP project to create images and instances in.
161      zone: The default zone to create instances in.
162      json_key_file: Path to the service account JSON key.
163
164    Returns:
165      GceContext.
166    """
167    credentials = GoogleCredentials.from_stream(json_key_file).create_scoped(
168        cls._GCE_SCOPES)
169    return GceContext(project, zone, credentials)
170
171  @classmethod
172  def ForServiceAccountThreadSafe(cls, project, zone, json_key_file):
173    """Creates a thread-safe GceContext using service account credentials.
174
175    About service account:
176    https://developers.google.com/api-client-library/python/auth/service-accounts
177
178    Args:
179      project: The GCP project to create images and instances in.
180      zone: The default zone to create instances in.
181      json_key_file: Path to the service account JSON key.
182
183    Returns:
184      GceContext.
185    """
186    credentials = GoogleCredentials.from_stream(json_key_file).create_scoped(
187        cls._GCE_SCOPES)
188    return GceContext(project, zone, credentials, thread_safe=True)
189
190  def CreateAddress(self, name, region=None):
191    """Reserves an external IP address.
192
193    Args:
194      name: The name to assign to the address.
195      region: Region to reserved the address in.
196
197    Returns:
198      The reserved address as a string.
199    """
200    body = {
201        'name': name,
202    }
203    operation = self.gce_client.addresses().insert(
204        project=self.project,
205        region=region or self.region,
206        body=body).execute()
207    self._WaitForRegionOperation(
208        operation['name'], region,
209        timeout_sec=self.INSTANCE_OPERATIONS_TIMEOUT_SEC)
210
211    address = self.gce_client.addresses().get(
212        project=self.project,
213        region=region or self.region,
214        address=name).execute()
215
216    return address['address']
217
218  def DeleteAddress(self, name, region=None):
219    """Frees up an external IP address.
220
221    Args:
222      name: The name of the address.
223      region: Region of the address.
224    """
225    operation = self.gce_client.addresses().delete(
226        project=self.project,
227        region=region or self.region,
228        address=name).execute()
229    self._WaitForRegionOperation(
230        operation['name'], region=region or self.region,
231        timeout_sec=self.INSTANCE_OPERATIONS_TIMEOUT_SEC)
232
233  def GetZoneRegion(self, zone=None):
234    """Resolves name of the region that a zone belongs to.
235
236    Args:
237      zone: The zone to resolve.
238
239    Returns:
240      Name of the region corresponding to the zone.
241    """
242    zone_resource = self.gce_client.zones().get(
243        project=self.project,
244        zone=zone or self.zone).execute()
245    return zone_resource['region'].split('/')[-1]
246
247  def CreateInstance(self, name, image, zone=None, network=None, subnet=None,
248                     machine_type=None, default_scopes=True,
249                     static_address=None, **kwargs):
250    """Creates an instance with the given image and waits until it's ready.
251
252    Args:
253      name: Instance name.
254      image: Fully spelled URL of the image, e.g., for private images,
255          'global/images/my-private-image', or for images from a
256          publicly-available project,
257          'projects/debian-cloud/global/images/debian-7-wheezy-vYYYYMMDD'.
258          Details:
259          https://cloud.google.com/compute/docs/reference/latest/instances/insert
260      zone: The zone to create the instance in. Default zone will be used if
261          omitted.
262      network: An existing network to create the instance in. Default network
263          will be used if omitted.
264      subnet: The subnet to create the instance in.
265      machine_type: The machine type to use. Default machine type will be used
266          if omitted.
267      default_scopes: If true, the default scopes are added to the instances.
268      static_address: External IP address to assign to the instance as a string.
269          If None an emphemeral address will be used.
270      kwargs: Other possible Instance Resource properties.
271          https://cloud.google.com/compute/docs/reference/latest/instances#resource
272          Note that values from kwargs will overrule properties constructed from
273          positinal arguments, i.e., name, image, zone, network and
274          machine_type.
275
276    Returns:
277      URL to the created instance.
278    """
279    logging.info('Creating instance "%s" with image "%s" ...', name, image)
280    network = 'global/networks/%s' % network or self._DEFAULT_NETWORK
281    machine_type = 'zones/%s/machineTypes/%s' % (
282        zone or self.zone, machine_type or self._DEFAULT_MACHINE_TYPE)
283    service_accounts = (
284        {
285            'email': self._DEFAULT_SERVICE_ACCOUNT_EMAIL,
286            'scopes': self._DEFAULT_INSTANCE_SCOPES,
287        },
288    ) if default_scopes else ()
289
290    config = {
291        'name': name,
292        'machineType': machine_type,
293        'disks': (
294            {
295                'boot': True,
296                'autoDelete': True,
297                'initializeParams': {
298                    'sourceImage': image,
299                },
300            },
301        ),
302        'networkInterfaces': (
303            {
304                'network': network,
305                'accessConfigs': (
306                    {
307                        'type': 'ONE_TO_ONE_NAT',
308                        'name': 'External NAT',
309                    },
310                ),
311            },
312        ),
313        'serviceAccounts' : service_accounts,
314    }
315    config.update(**kwargs)
316    if static_address is not None:
317      config['networkInterfaces'][0]['accessConfigs'][0]['natIP'] = (
318          static_address)
319    if subnet is not None:
320      region = self.GetZoneRegion(zone)
321      config['networkInterfaces'][0]['subnetwork'] = (
322          'regions/%s/subnetworks/%s' % (region, subnet)
323      )
324    operation = self.gce_client.instances().insert(
325        project=self.project,
326        zone=zone or self.zone,
327        body=config).execute()
328    self._WaitForZoneOperation(
329        operation['name'],
330        timeout_sec=self.INSTANCE_OPERATIONS_TIMEOUT_SEC,
331        timeout_handler=lambda: self.DeleteInstance(name))
332    return operation['targetLink']
333
334  def DeleteInstance(self, name, zone=None):
335    """Deletes an instance with the name and waits until it's done.
336
337    Args:
338      name: Name of the instance to delete.
339      zone: Zone where the instance is in. Default zone will be used if omitted.
340    """
341    logging.info('Deleting instance "%s" ...', name)
342    operation = self.gce_client.instances().delete(
343        project=self.project,
344        zone=zone or self.zone,
345        instance=name).execute()
346    self._WaitForZoneOperation(
347        operation['name'], timeout_sec=self.INSTANCE_OPERATIONS_TIMEOUT_SEC)
348
349  def StartInstance(self, name, zone=None):
350    """Starts an instance with the name and waits until it's done.
351
352    Args:
353      name: Name of the instance to start.
354      zone: Zone where the instance is in. Default zone will be used if omitted.
355    """
356    logging.info('Starting instance "%s" ...', name)
357    operation = self.gce_client.instances().start(
358        project=self.project,
359        zone=zone or self.zone,
360        instance=name).execute()
361    self._WaitForZoneOperation(
362        operation['name'], timeout_sec=self.INSTANCE_OPERATIONS_TIMEOUT_SEC)
363
364  def StopInstance(self, name, zone=None):
365    """Stops an instance with the name and waits until it's done.
366
367    Args:
368      name: Name of the instance to stop.
369      zone: Zone where the instance is in. Default zone will be used if omitted.
370    """
371    logging.info('Stopping instance "%s" ...', name)
372    operation = self.gce_client.instances().stop(
373        project=self.project,
374        zone=zone or self.zone,
375        instance=name).execute()
376    self._WaitForZoneOperation(
377        operation['name'], timeout_sec=self.INSTANCE_OPERATIONS_TIMEOUT_SEC)
378
379  def CreateImage(self, name, source):
380    """Creates an image with the given |source|.
381
382    Args:
383      name: Name of the image to be created.
384      source:
385        Google Cloud Storage object of the source disk, e.g.,
386        'https://storage.googleapis.com/my-gcs-bucket/test_image.tar.gz'.
387
388    Returns:
389      URL to the created image.
390    """
391    logging.info('Creating image "%s" with "source" %s ...', name, source)
392    config = {
393        'name': name,
394        'rawDisk': {
395            'source': source,
396        },
397    }
398    operation = self.gce_client.images().insert(
399        project=self.project,
400        body=config).execute()
401    self._WaitForGlobalOperation(operation['name'],
402                                 timeout_sec=self.IMAGE_OPERATIONS_TIMEOUT_SEC,
403                                 timeout_handler=lambda: self.DeleteImage(name))
404    return operation['targetLink']
405
406  def DeleteImage(self, name):
407    """Deletes an image and waits until it's deleted.
408
409    Args:
410      name: Name of the image to delete.
411    """
412    logging.info('Deleting image "%s" ...', name)
413    operation = self.gce_client.images().delete(
414        project=self.project,
415        image=name).execute()
416    self._WaitForGlobalOperation(operation['name'],
417                                 timeout_sec=self.IMAGE_OPERATIONS_TIMEOUT_SEC)
418
419  def ListInstances(self, zone=None):
420    """Lists all instances.
421
422    Args:
423      zone: Zone where the instances are in. Default zone will be used if
424            omitted.
425
426    Returns:
427      A list of Instance Resources if found, or an empty list otherwise.
428    """
429    result = self.gce_client.instances().list(project=self.project,
430                                              zone=zone or self.zone).execute()
431    return result.get('items', [])
432
433  def ListImages(self):
434    """Lists all images.
435
436    Returns:
437      A list of Image Resources if found, or an empty list otherwise.
438    """
439    result = self.gce_client.images().list(project=self.project).execute()
440    return result.get('items', [])
441
442  def GetInstance(self, instance, zone=None):
443    """Gets an Instance Resource by name and zone.
444
445    Args:
446      instance: Name of the instance.
447      zone: Zone where the instance is in. Default zone will be used if omitted.
448
449    Returns:
450      An Instance Resource.
451
452    Raises:
453      ResourceNotFoundError if instance was not found, or HttpError on other
454      HTTP failures.
455    """
456    try:
457      return self.gce_client.instances().get(project=self.project,
458                                             zone=zone or self.zone,
459                                             instance=instance).execute()
460    except HttpError as e:
461      if e.resp.status == 404:
462        raise ResourceNotFoundError(
463            'Instance "%s" for project "%s" in zone "%s" was not found.' %
464            (instance, self.project, zone or self.zone))
465      else:
466        raise
467
468  def GetInstanceIP(self, instance, zone=None):
469    """Gets the external IP of an instance.
470
471    Args:
472      instance: Name of the instance to get IP for.
473      zone: Zone where the instance is in. Default zone will be used if omitted.
474
475    Returns:
476      External IP address of the instance.
477
478    Raises:
479      Error: Something went wrong when trying to get IP for the instance.
480    """
481    result = self.GetInstance(instance, zone)
482    try:
483      return result['networkInterfaces'][0]['accessConfigs'][0]['natIP']
484    except (KeyError, IndexError):
485      raise Error('Failed to get IP address for instance %s' % instance)
486
487  def GetInstanceInternalIP(self, instance, zone=None):
488    """Gets the internal IP of an instance."""
489    result = self.GetInstance(instance, zone)
490    try:
491      return result['networkInterfaces'][0]['networkIP']
492    except (KeyError, IndexError):
493      raise Error('Failed to get internal IP for instance %s' % instance)
494
495  def GetImage(self, image):
496    """Gets an Image Resource by name.
497
498    Args:
499      image: Name of the image to look for.
500
501    Returns:
502      An Image Resource.
503
504    Raises:
505      ResourceNotFoundError: The requested image was not found.
506    """
507    try:
508      return self.gce_client.images().get(project=self.project,
509                                          image=image).execute()
510    except HttpError as e:
511      if e.resp.status == 404:
512        raise ResourceNotFoundError('Image "%s" for project "%s" was not found.'
513                                    % (image, self.project))
514      else:
515        raise
516
517  def InstanceExists(self, instance, zone=None):
518    """Checks if an instance exists in the current project.
519
520    Args:
521      instance: Name of the instance to check existence of.
522      zone: Zone where the instance is in. Default zone will be used if omitted.
523
524    Returns:
525      True if the instance exists or False otherwise.
526    """
527    try:
528      return self.GetInstance(instance, zone) is not None
529    except ResourceNotFoundError:
530      return False
531
532  def ImageExists(self, image):
533    """Checks if an image exists in the current project.
534
535    Args:
536      image: Name of the image to check existence of.
537
538    Returns:
539      True if the instance exists or False otherwise.
540    """
541    try:
542      return self.GetImage(image) is not None
543    except ResourceNotFoundError:
544      return False
545
546  def GetCommonInstanceMetadata(self, key):
547    """Looks up a single project metadata value.
548
549    Args:
550      key: Metadata key name.
551
552    Returns:
553      Metadata value corresponding to the key, or None if it was not found.
554    """
555    projects_data = self.gce_client.projects().get(
556        project=self.project).execute()
557    metadata = projects_data['commonInstanceMetadata']
558    return _GetMetdataValue(metadata, key)
559
560  def SetCommonInstanceMetadata(self, key, value):
561    """Sets a single project metadata value.
562
563    Args:
564      key: Metadata key to be set.
565      value: New value, or None if the given key should be removed.
566    """
567    projects_data = self.gce_client.projects().get(
568        project=self.project).execute()
569    metadata = projects_data['commonInstanceMetadata']
570    _UpdateMetadataValue(metadata, key, value)
571    operation = self.gce_client.projects().setCommonInstanceMetadata(
572        project=self.project,
573        body=metadata).execute()
574    self._WaitForGlobalOperation(operation['name'])
575
576  def GetInstanceMetadata(self, instance, key):
577    """Looks up instance's metadata value.
578
579    Args:
580      instance: Name of the instance.
581      key: Metadata key name.
582
583    Returns:
584      Metadata value corresponding to the key, or None if it was not found.
585    """
586    instance_data = self.GetInstance(instance)
587    metadata = instance_data['metadata']
588    return self._GetMetdataValue(metadata, key)
589
590  def SetInstanceMetadata(self, instance, key, value):
591    """Sets a single instance metadata value.
592
593    Args:
594      instance: Name of the instance.
595      key: Metadata key to be set.
596      value: New value, or None if the given key should be removed.
597    """
598    instance_data = self.GetInstance(instance)
599    metadata = instance_data['metadata']
600    _UpdateMetadataValue(metadata, key, value)
601    operation = self.gce_client.instances().setMetadata(
602        project=self.project,
603        zone=self.zone,
604        instance=instance,
605        body=metadata).execute()
606    self._WaitForZoneOperation(operation['name'])
607
608  def _WaitForZoneOperation(self, operation, zone=None, timeout_sec=None,
609                            timeout_handler=None):
610    """Waits until a GCE ZoneOperation is finished or timed out.
611
612    Args:
613      operation: The GCE operation to wait for.
614      zone: The zone that |operation| belongs to.
615      timeout_sec: The maximum number of seconds to wait for.
616      timeout_handler: A callable to be executed when timeout happens.
617
618    Raises:
619      Error when timeout happens or the operation fails.
620    """
621    get_request = self.gce_client.zoneOperations().get(
622        project=self.project, zone=zone or self.zone, operation=operation)
623    self._WaitForOperation(operation, get_request, timeout_sec,
624                           timeout_handler=timeout_handler)
625
626  def _WaitForRegionOperation(self, operation, region, timeout_sec=None,
627                              timeout_handler=None):
628    """Waits until a GCE RegionOperation is finished or timed out.
629
630    Args:
631      operation: The GCE operation to wait for.
632      region: The region that |operation| belongs to.
633      timeout_sec: The maximum number of seconds to wait for.
634      timeout_handler: A callable to be executed when timeout happens.
635
636    Raises:
637      Error when timeout happens or the operation fails.
638    """
639    get_request = self.gce_client.regionOperations().get(
640        project=self.project, region=region or self.region, operation=operation)
641    self._WaitForOperation(operation, get_request, timeout_sec,
642                           timeout_handler=timeout_handler)
643
644  def _WaitForGlobalOperation(self, operation, timeout_sec=None,
645                              timeout_handler=None):
646    """Waits until a GCE GlobalOperation is finished or timed out.
647
648    Args:
649      operation: The GCE operation to wait for.
650      timeout_sec: The maximum number of seconds to wait for.
651      timeout_handler: A callable to be executed when timeout happens.
652
653    Raises:
654      Error when timeout happens or the operation fails.
655    """
656    get_request = self.gce_client.globalOperations().get(project=self.project,
657                                                         operation=operation)
658    self._WaitForOperation(operation, get_request, timeout_sec=timeout_sec,
659                           timeout_handler=timeout_handler)
660
661  def _WaitForOperation(self, operation, get_operation_request,
662                        timeout_sec=None, timeout_handler=None):
663    """Waits until timeout or the request gets a response with a 'DONE' status.
664
665    Args:
666      operation: The GCE operation to wait for.
667      get_operation_request:
668        The HTTP request to get the operation's status.
669        This request will be executed periodically until it returns a status
670        'DONE'.
671      timeout_sec: The maximum number of seconds to wait for.
672      timeout_handler: A callable to be executed when times out.
673
674    Raises:
675      Error when timeout happens or the operation fails.
676    """
677    def _IsDone():
678      result = get_operation_request.execute()
679      if result['status'] == 'DONE':
680        if 'error' in result:
681          raise Error(result['error'])
682        return True
683      return False
684
685    try:
686      timeout = timeout_sec or self.DEFAULT_TIMEOUT_SEC
687      logging.info('Waiting up to %d seconds for operation [%s] to complete...',
688                   timeout, operation)
689      timeout_util.WaitForReturnTrue(_IsDone, timeout, period=1)
690    except timeout_util.TimeoutError:
691      if timeout_handler:
692        timeout_handler()
693      raise Error('Timeout wating for operation [%s] to complete' % operation)
694
695  def _BuildRetriableRequest(self, num_retries, http, thread_safe=False,
696                             credentials=None, *args, **kwargs):
697    """Builds a request that will be automatically retried on server errors.
698
699    Args:
700      num_retries: The maximum number of times to retry until give up.
701      http: An httplib2.Http object that this request will be executed through.
702      thread_safe: Whether or not the request needs to be thread-safe.
703      credentials: Credentials to apply to the request.
704      *args: Optional positional arguments.
705      **kwargs: Optional keyword arguments.
706
707    Returns:
708      RetryOnServerErrorHttpRequest: A request that will automatically retried
709          on server errors.
710    """
711    if thread_safe:
712      # Create a new http object for every request.
713      http = credentials.authorize(httplib2.Http())
714    return RetryOnServerErrorHttpRequest(num_retries, http, *args, **kwargs)
715