1*9c5db199SXin Li# -*- coding: utf-8 -*- 2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li 6*9c5db199SXin Li"""Library to make common google storage operations more reliable.""" 7*9c5db199SXin Li 8*9c5db199SXin Lifrom __future__ import print_function 9*9c5db199SXin Li 10*9c5db199SXin Liimport collections 11*9c5db199SXin Liimport contextlib 12*9c5db199SXin Liimport datetime 13*9c5db199SXin Liimport errno 14*9c5db199SXin Liimport fnmatch 15*9c5db199SXin Liimport getpass 16*9c5db199SXin Liimport glob 17*9c5db199SXin Liimport hashlib 18*9c5db199SXin Liimport os 19*9c5db199SXin Liimport re 20*9c5db199SXin Liimport shutil 21*9c5db199SXin Liimport subprocess 22*9c5db199SXin Liimport tempfile 23*9c5db199SXin Li 24*9c5db199SXin Liimport six 25*9c5db199SXin Lifrom six.moves import urllib 26*9c5db199SXin Li 27*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import constants 28*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cache 29*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_build_lib 30*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_collections 31*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_logging as logging 32*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import osutils 33*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import path_util 34*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import retry_stats 35*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import retry_util 36*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import signals 37*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import timeout_util 38*9c5db199SXin Li 39*9c5db199SXin Li 40*9c5db199SXin Li# This bucket has the allAuthenticatedUsers:READER ACL. 41*9c5db199SXin LiAUTHENTICATION_BUCKET = 'gs://chromeos-authentication-bucket/' 42*9c5db199SXin Li 43*9c5db199SXin Li# Public path, only really works for files. 44*9c5db199SXin LiPUBLIC_BASE_HTTPS_URL = 'https://storage.googleapis.com/' 45*9c5db199SXin Li 46*9c5db199SXin Li# Private path for files. 47*9c5db199SXin LiPRIVATE_BASE_HTTPS_URL = 'https://storage.cloud.google.com/' 48*9c5db199SXin Li 49*9c5db199SXin Li# Private path for directories. 50*9c5db199SXin Li# TODO(akeshet): this is a workaround for b/27653354. If that is ultimately 51*9c5db199SXin Li# fixed, revisit this workaround. 52*9c5db199SXin LiPRIVATE_BASE_HTTPS_DOWNLOAD_URL = 'https://stainless.corp.google.com/browse/' 53*9c5db199SXin LiBASE_GS_URL = 'gs://' 54*9c5db199SXin Li 55*9c5db199SXin Li# Format used by "gsutil ls -l" when reporting modified time. 56*9c5db199SXin LiDATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' 57*9c5db199SXin Li 58*9c5db199SXin Li# Regexp for parsing each line of output from "gsutil ls -l". 59*9c5db199SXin Li# This regexp is prepared for the generation and meta_generation values, 60*9c5db199SXin Li# too, even though they are not expected until we use "-a". 61*9c5db199SXin Li# 62*9c5db199SXin Li# A detailed listing looks like: 63*9c5db199SXin Li# 99908 2014-03-01T05:50:08Z gs://bucket/foo/abc#1234 metageneration=1 64*9c5db199SXin Li# gs://bucket/foo/adir/ 65*9c5db199SXin Li# 99908 2014-03-04T01:16:55Z gs://bucket/foo/def#5678 metageneration=1 66*9c5db199SXin Li# TOTAL: 2 objects, 199816 bytes (495.36 KB) 67*9c5db199SXin LiLS_LA_RE = re.compile( 68*9c5db199SXin Li r'^\s*(?P<content_length>\d*?)\s+' 69*9c5db199SXin Li r'(?P<creation_time>\S*?)\s+' 70*9c5db199SXin Li r'(?P<url>[^#$]+).*?' 71*9c5db199SXin Li r'(' 72*9c5db199SXin Li r'#(?P<generation>\d+)\s+' 73*9c5db199SXin Li r'meta_?generation=(?P<metageneration>\d+)' 74*9c5db199SXin Li r')?\s*$') 75*9c5db199SXin LiLS_RE = re.compile(r'^\s*(?P<content_length>)(?P<creation_time>)(?P<url>.*)' 76*9c5db199SXin Li r'(?P<generation>)(?P<metageneration>)\s*$') 77*9c5db199SXin Li 78*9c5db199SXin Li# Format used by ContainsWildCard, which is duplicated from 79*9c5db199SXin Li# https://github.com/GoogleCloudPlatform/gsutil/blob/v4.21/gslib/storage_url.py#L307. 80*9c5db199SXin LiWILDCARD_REGEX = re.compile(r'[*?\[\]]') 81*9c5db199SXin Li 82*9c5db199SXin Li 83*9c5db199SXin Lidef PathIsGs(path): 84*9c5db199SXin Li """Determine if a path is a Google Storage URI.""" 85*9c5db199SXin Li return path.startswith(BASE_GS_URL) 86*9c5db199SXin Li 87*9c5db199SXin Li 88*9c5db199SXin Lidef CanonicalizeURL(url, strict=False): 89*9c5db199SXin Li """Convert provided URL to gs:// URL, if it follows a known format. 90*9c5db199SXin Li 91*9c5db199SXin Li Args: 92*9c5db199SXin Li url: URL to canonicalize. 93*9c5db199SXin Li strict: Raises exception if URL cannot be canonicalized. 94*9c5db199SXin Li """ 95*9c5db199SXin Li for prefix in (PUBLIC_BASE_HTTPS_URL, 96*9c5db199SXin Li PRIVATE_BASE_HTTPS_URL, 97*9c5db199SXin Li PRIVATE_BASE_HTTPS_DOWNLOAD_URL, 98*9c5db199SXin Li 'https://pantheon.corp.google.com/storage/browser/', 99*9c5db199SXin Li 'https://commondatastorage.googleapis.com/'): 100*9c5db199SXin Li if url.startswith(prefix): 101*9c5db199SXin Li return url.replace(prefix, BASE_GS_URL, 1) 102*9c5db199SXin Li 103*9c5db199SXin Li if not PathIsGs(url) and strict: 104*9c5db199SXin Li raise ValueError('Url %r cannot be canonicalized.' % url) 105*9c5db199SXin Li 106*9c5db199SXin Li return url 107*9c5db199SXin Li 108*9c5db199SXin Li 109*9c5db199SXin Lidef GetGsURL(bucket, for_gsutil=False, public=True, suburl=''): 110*9c5db199SXin Li """Construct a Google Storage URL 111*9c5db199SXin Li 112*9c5db199SXin Li Args: 113*9c5db199SXin Li bucket: The Google Storage bucket to use 114*9c5db199SXin Li for_gsutil: Do you want a URL for passing to `gsutil`? 115*9c5db199SXin Li public: Do we want the public or private url 116*9c5db199SXin Li suburl: A url fragment to tack onto the end 117*9c5db199SXin Li 118*9c5db199SXin Li Returns: 119*9c5db199SXin Li The fully constructed URL 120*9c5db199SXin Li """ 121*9c5db199SXin Li url = 'gs://%s/%s' % (bucket, suburl) 122*9c5db199SXin Li 123*9c5db199SXin Li if for_gsutil: 124*9c5db199SXin Li return url 125*9c5db199SXin Li else: 126*9c5db199SXin Li return GsUrlToHttp(url, public=public) 127*9c5db199SXin Li 128*9c5db199SXin Li 129*9c5db199SXin Lidef GsUrlToHttp(path, public=True, directory=False): 130*9c5db199SXin Li """Convert a GS URL to a HTTP URL for the same resource. 131*9c5db199SXin Li 132*9c5db199SXin Li Because the HTTP Urls are not fixed (and may not always be simple prefix 133*9c5db199SXin Li replacements), use this method to centralize the conversion. 134*9c5db199SXin Li 135*9c5db199SXin Li Directories need to have different URLs from files, because the Web UIs for GS 136*9c5db199SXin Li are weird and really inconsistent. Also public directories probably 137*9c5db199SXin Li don't work, and probably never will (permissions as well as UI). 138*9c5db199SXin Li 139*9c5db199SXin Li e.g. 'gs://chromeos-image-archive/path/file' -> 140*9c5db199SXin Li 'https://pantheon/path/file' 141*9c5db199SXin Li 142*9c5db199SXin Li Args: 143*9c5db199SXin Li path: GS URL to convert. 144*9c5db199SXin Li public: Is this URL for Googler access, or publicly visible? 145*9c5db199SXin Li directory: Force this URL to be treated as a directory? 146*9c5db199SXin Li We try to autodetect on False. 147*9c5db199SXin Li 148*9c5db199SXin Li Returns: 149*9c5db199SXin Li https URL as a string. 150*9c5db199SXin Li """ 151*9c5db199SXin Li assert PathIsGs(path) 152*9c5db199SXin Li directory = directory or path.endswith('/') 153*9c5db199SXin Li 154*9c5db199SXin Li # Public HTTP URls for directories don't work' 155*9c5db199SXin Li # assert not public or not directory, 156*9c5db199SXin Li 157*9c5db199SXin Li if public: 158*9c5db199SXin Li return path.replace(BASE_GS_URL, PUBLIC_BASE_HTTPS_URL, 1) 159*9c5db199SXin Li else: 160*9c5db199SXin Li if directory: 161*9c5db199SXin Li return path.replace(BASE_GS_URL, PRIVATE_BASE_HTTPS_DOWNLOAD_URL, 1) 162*9c5db199SXin Li else: 163*9c5db199SXin Li return path.replace(BASE_GS_URL, PRIVATE_BASE_HTTPS_URL, 1) 164*9c5db199SXin Li 165*9c5db199SXin Li 166*9c5db199SXin Liclass GSContextException(Exception): 167*9c5db199SXin Li """Base exception for all exceptions thrown by GSContext.""" 168*9c5db199SXin Li 169*9c5db199SXin Li 170*9c5db199SXin Li# Since the underlying code uses run, some callers might be trying to 171*9c5db199SXin Li# catch cros_build_lib.RunCommandError themselves. Extend that class so that 172*9c5db199SXin Li# code continues to work. 173*9c5db199SXin Liclass GSCommandError(GSContextException, cros_build_lib.RunCommandError): 174*9c5db199SXin Li """Thrown when an error happened we couldn't decode.""" 175*9c5db199SXin Li 176*9c5db199SXin Li 177*9c5db199SXin Liclass GSContextPreconditionFailed(GSContextException): 178*9c5db199SXin Li """Thrown when google storage returns code=PreconditionFailed.""" 179*9c5db199SXin Li 180*9c5db199SXin Li 181*9c5db199SXin Liclass GSNoSuchKey(GSContextException): 182*9c5db199SXin Li """Thrown when google storage returns code=NoSuchKey.""" 183*9c5db199SXin Li 184*9c5db199SXin Li 185*9c5db199SXin Li# Detailed results of GSContext.Stat. 186*9c5db199SXin Li# 187*9c5db199SXin Li# The fields directory correspond to gsutil stat results. 188*9c5db199SXin Li# 189*9c5db199SXin Li# Field name Type Example 190*9c5db199SXin Li# creation_time datetime Sat, 23 Aug 2014 06:53:20 GMT 191*9c5db199SXin Li# content_length int 74 192*9c5db199SXin Li# content_type string application/octet-stream 193*9c5db199SXin Li# hash_crc32c string BBPMPA== 194*9c5db199SXin Li# hash_md5 string ms+qSYvgI9SjXn8tW/5UpQ== 195*9c5db199SXin Li# etag string CNCgocbmqMACEAE= 196*9c5db199SXin Li# generation int 1408776800850000 197*9c5db199SXin Li# metageneration int 1 198*9c5db199SXin Li# 199*9c5db199SXin Li# Note: We omit a few stat fields as they are not always available, and we 200*9c5db199SXin Li# have no callers that want this currently. 201*9c5db199SXin Li# 202*9c5db199SXin Li# content_language string/None en # This field may be None. 203*9c5db199SXin LiGSStatResult = collections.namedtuple( 204*9c5db199SXin Li 'GSStatResult', 205*9c5db199SXin Li ('creation_time', 'content_length', 'content_type', 'hash_crc32c', 206*9c5db199SXin Li 'hash_md5', 'etag', 'generation', 'metageneration')) 207*9c5db199SXin Li 208*9c5db199SXin Li 209*9c5db199SXin Li# Detailed results of GSContext.List. 210*9c5db199SXin LiGSListResult = collections.namedtuple( 211*9c5db199SXin Li 'GSListResult', 212*9c5db199SXin Li ('url', 'creation_time', 'content_length', 'generation', 'metageneration')) 213*9c5db199SXin Li 214*9c5db199SXin Li 215*9c5db199SXin LiErrorDetails = cros_collections.Collection( 216*9c5db199SXin Li 'ErrorDetails', 217*9c5db199SXin Li type=None, message_pattern='', retriable=None, exception=None) 218*9c5db199SXin Li 219*9c5db199SXin Li 220*9c5db199SXin Liclass GSCounter(object): 221*9c5db199SXin Li """A counter class for Google Storage.""" 222*9c5db199SXin Li 223*9c5db199SXin Li def __init__(self, ctx, path): 224*9c5db199SXin Li """Create a counter object. 225*9c5db199SXin Li 226*9c5db199SXin Li Args: 227*9c5db199SXin Li ctx: A GSContext object. 228*9c5db199SXin Li path: The path to the counter in Google Storage. 229*9c5db199SXin Li """ 230*9c5db199SXin Li self.ctx = ctx 231*9c5db199SXin Li self.path = path 232*9c5db199SXin Li 233*9c5db199SXin Li def Get(self): 234*9c5db199SXin Li """Get the current value of a counter.""" 235*9c5db199SXin Li try: 236*9c5db199SXin Li return int(self.ctx.Cat(self.path)) 237*9c5db199SXin Li except GSNoSuchKey: 238*9c5db199SXin Li return 0 239*9c5db199SXin Li 240*9c5db199SXin Li def AtomicCounterOperation(self, default_value, operation): 241*9c5db199SXin Li """Atomically set the counter value using |operation|. 242*9c5db199SXin Li 243*9c5db199SXin Li Args: 244*9c5db199SXin Li default_value: Default value to use for counter, if counter 245*9c5db199SXin Li does not exist. 246*9c5db199SXin Li operation: Function that takes the current counter value as a 247*9c5db199SXin Li parameter, and returns the new desired value. 248*9c5db199SXin Li 249*9c5db199SXin Li Returns: 250*9c5db199SXin Li The new counter value. None if value could not be set. 251*9c5db199SXin Li """ 252*9c5db199SXin Li generation, _ = self.ctx.GetGeneration(self.path) 253*9c5db199SXin Li for _ in range(self.ctx.retries + 1): 254*9c5db199SXin Li try: 255*9c5db199SXin Li value = default_value if generation == 0 else operation(self.Get()) 256*9c5db199SXin Li self.ctx.Copy('-', self.path, input=str(value), version=generation) 257*9c5db199SXin Li return value 258*9c5db199SXin Li except (GSContextPreconditionFailed, GSNoSuchKey): 259*9c5db199SXin Li # GSContextPreconditionFailed is thrown if another builder is also 260*9c5db199SXin Li # trying to update the counter and we lost the race. GSNoSuchKey is 261*9c5db199SXin Li # thrown if another builder deleted the counter. In either case, fetch 262*9c5db199SXin Li # the generation again, and, if it has changed, try the copy again. 263*9c5db199SXin Li new_generation, _ = self.ctx.GetGeneration(self.path) 264*9c5db199SXin Li if new_generation == generation: 265*9c5db199SXin Li raise 266*9c5db199SXin Li generation = new_generation 267*9c5db199SXin Li 268*9c5db199SXin Li def Increment(self): 269*9c5db199SXin Li """Increment the counter. 270*9c5db199SXin Li 271*9c5db199SXin Li Returns: 272*9c5db199SXin Li The new counter value. None if value could not be set. 273*9c5db199SXin Li """ 274*9c5db199SXin Li return self.AtomicCounterOperation(1, lambda x: x + 1) 275*9c5db199SXin Li 276*9c5db199SXin Li def Decrement(self): 277*9c5db199SXin Li """Decrement the counter. 278*9c5db199SXin Li 279*9c5db199SXin Li Returns: 280*9c5db199SXin Li The new counter value. None if value could not be set. 281*9c5db199SXin Li """ 282*9c5db199SXin Li return self.AtomicCounterOperation(-1, lambda x: x - 1) 283*9c5db199SXin Li 284*9c5db199SXin Li def Reset(self): 285*9c5db199SXin Li """Reset the counter to zero. 286*9c5db199SXin Li 287*9c5db199SXin Li Returns: 288*9c5db199SXin Li The new counter value. None if value could not be set. 289*9c5db199SXin Li """ 290*9c5db199SXin Li return self.AtomicCounterOperation(0, lambda x: 0) 291*9c5db199SXin Li 292*9c5db199SXin Li def StreakIncrement(self): 293*9c5db199SXin Li """Increment the counter if it is positive, otherwise set it to 1. 294*9c5db199SXin Li 295*9c5db199SXin Li Returns: 296*9c5db199SXin Li The new counter value. None if value could not be set. 297*9c5db199SXin Li """ 298*9c5db199SXin Li return self.AtomicCounterOperation(1, lambda x: x + 1 if x > 0 else 1) 299*9c5db199SXin Li 300*9c5db199SXin Li def StreakDecrement(self): 301*9c5db199SXin Li """Decrement the counter if it is negative, otherwise set it to -1. 302*9c5db199SXin Li 303*9c5db199SXin Li Returns: 304*9c5db199SXin Li The new counter value. None if value could not be set. 305*9c5db199SXin Li """ 306*9c5db199SXin Li return self.AtomicCounterOperation(-1, lambda x: x - 1 if x < 0 else -1) 307*9c5db199SXin Li 308*9c5db199SXin Li 309*9c5db199SXin Liclass GSContext(object): 310*9c5db199SXin Li """A class to wrap common google storage operations.""" 311*9c5db199SXin Li 312*9c5db199SXin Li # Error messages that indicate an invalid BOTO config. 313*9c5db199SXin Li AUTHORIZATION_ERRORS = ('no configured', 'none configured', 314*9c5db199SXin Li 'detail=Authorization', '401 Anonymous caller') 315*9c5db199SXin Li 316*9c5db199SXin Li DEFAULT_BOTO_FILE = os.path.expanduser('~/.boto') 317*9c5db199SXin Li DEFAULT_GSUTIL_TRACKER_DIR = os.path.expanduser('~/.gsutil/tracker-files') 318*9c5db199SXin Li # This is set for ease of testing. 319*9c5db199SXin Li DEFAULT_GSUTIL_BIN = None 320*9c5db199SXin Li DEFAULT_GSUTIL_BUILDER_BIN = '/b/build/third_party/gsutil/gsutil' 321*9c5db199SXin Li # How many times to retry uploads. 322*9c5db199SXin Li DEFAULT_RETRIES = 3 323*9c5db199SXin Li 324*9c5db199SXin Li # Multiplier for how long to sleep (in seconds) between retries; will delay 325*9c5db199SXin Li # (1*sleep) the first time, then (2*sleep), continuing via attempt * sleep. 326*9c5db199SXin Li DEFAULT_SLEEP_TIME = 60 327*9c5db199SXin Li 328*9c5db199SXin Li GSUTIL_VERSION = '4.51' 329*9c5db199SXin Li GSUTIL_TAR = 'gsutil_%s.tar.gz' % GSUTIL_VERSION 330*9c5db199SXin Li GSUTIL_URL = (PUBLIC_BASE_HTTPS_URL + 331*9c5db199SXin Li 'chromeos-mirror/gentoo/distfiles/%s' % GSUTIL_TAR) 332*9c5db199SXin Li GSUTIL_API_SELECTOR = 'JSON' 333*9c5db199SXin Li 334*9c5db199SXin Li RESUMABLE_UPLOAD_ERROR = (b'Too many resumable upload attempts failed ' 335*9c5db199SXin Li b'without progress') 336*9c5db199SXin Li RESUMABLE_DOWNLOAD_ERROR = (b'Too many resumable download attempts failed ' 337*9c5db199SXin Li b'without progress') 338*9c5db199SXin Li 339*9c5db199SXin Li # TODO: Below is a list of known flaky errors that we should 340*9c5db199SXin Li # retry. The list needs to be extended. 341*9c5db199SXin Li RESUMABLE_ERROR_MESSAGE = ( 342*9c5db199SXin Li RESUMABLE_DOWNLOAD_ERROR, 343*9c5db199SXin Li RESUMABLE_UPLOAD_ERROR, 344*9c5db199SXin Li b'ResumableUploadException', 345*9c5db199SXin Li b'ResumableUploadAbortException', 346*9c5db199SXin Li b'ResumableDownloadException', 347*9c5db199SXin Li b'ssl.SSLError: The read operation timed out', 348*9c5db199SXin Li # TODO: Error messages may change in different library versions, 349*9c5db199SXin Li # use regexes to match resumable error messages. 350*9c5db199SXin Li b"ssl.SSLError: ('The read operation timed out',)", 351*9c5db199SXin Li b'ssl.SSLError: _ssl.c:495: The handshake operation timed out', 352*9c5db199SXin Li b'Unable to find the server', 353*9c5db199SXin Li b"doesn't match cloud-supplied digest", 354*9c5db199SXin Li b'ssl.SSLError: [Errno 8]', 355*9c5db199SXin Li b'EOF occurred in violation of protocol', 356*9c5db199SXin Li # TODO(nxia): crbug.com/775330 narrow down the criteria for retrying 357*9c5db199SXin Li b'AccessDeniedException', 358*9c5db199SXin Li ) 359*9c5db199SXin Li 360*9c5db199SXin Li # We have seen flaky errors with 5xx return codes 361*9c5db199SXin Li # See b/17376491 for the "JSON decoding" error. 362*9c5db199SXin Li # We have seen transient Oauth 2.0 credential errors (crbug.com/414345). 363*9c5db199SXin Li TRANSIENT_ERROR_MESSAGE = ( 364*9c5db199SXin Li b'ServiceException: 5', 365*9c5db199SXin Li b'Failure: No JSON object could be decoded', 366*9c5db199SXin Li b'Oauth 2.0 User Account', 367*9c5db199SXin Li b'InvalidAccessKeyId', 368*9c5db199SXin Li b'socket.error: [Errno 104] Connection reset by peer', 369*9c5db199SXin Li b'Received bad request from server', 370*9c5db199SXin Li b"can't start new thread", 371*9c5db199SXin Li ) 372*9c5db199SXin Li 373*9c5db199SXin Li @classmethod 374*9c5db199SXin Li def GetDefaultGSUtilBin(cls, cache_dir=None, cache_user=None): 375*9c5db199SXin Li if cls.DEFAULT_GSUTIL_BIN is None: 376*9c5db199SXin Li if cache_dir is None: 377*9c5db199SXin Li cache_dir = path_util.GetCacheDir() 378*9c5db199SXin Li if cache_dir is not None: 379*9c5db199SXin Li common_path = os.path.join(cache_dir, constants.COMMON_CACHE) 380*9c5db199SXin Li tar_cache = cache.TarballCache(common_path, cache_user=cache_user) 381*9c5db199SXin Li key = (cls.GSUTIL_TAR,) 382*9c5db199SXin Li # The common cache will not be LRU, removing the need to hold a read 383*9c5db199SXin Li # lock on the cached gsutil. 384*9c5db199SXin Li ref = tar_cache.Lookup(key) 385*9c5db199SXin Li ref.SetDefault(cls.GSUTIL_URL) 386*9c5db199SXin Li cls.DEFAULT_GSUTIL_BIN = os.path.join(ref.path, 'gsutil', 'gsutil') 387*9c5db199SXin Li cls._CompileCrcmod(ref.path) 388*9c5db199SXin Li else: 389*9c5db199SXin Li # Check if the default gsutil path for builders exists. If 390*9c5db199SXin Li # not, try locating gsutil. If none exists, simply use 'gsutil'. 391*9c5db199SXin Li gsutil_bin = cls.DEFAULT_GSUTIL_BUILDER_BIN 392*9c5db199SXin Li if not os.path.exists(gsutil_bin): 393*9c5db199SXin Li gsutil_bin = osutils.Which('gsutil') 394*9c5db199SXin Li if gsutil_bin is None: 395*9c5db199SXin Li gsutil_bin = 'gsutil' 396*9c5db199SXin Li cls.DEFAULT_GSUTIL_BIN = gsutil_bin 397*9c5db199SXin Li 398*9c5db199SXin Li return cls.DEFAULT_GSUTIL_BIN 399*9c5db199SXin Li 400*9c5db199SXin Li @classmethod 401*9c5db199SXin Li def _CompileCrcmod(cls, path): 402*9c5db199SXin Li """Try to setup a compiled crcmod for gsutil. 403*9c5db199SXin Li 404*9c5db199SXin Li The native crcmod code is much faster than the python implementation, and 405*9c5db199SXin Li enables some more features (otherwise gsutil internally disables them). 406*9c5db199SXin Li Try to compile the module on demand in the crcmod tree bundled with gsutil. 407*9c5db199SXin Li 408*9c5db199SXin Li For more details, see: 409*9c5db199SXin Li https://cloud.google.com/storage/docs/gsutil/addlhelp/CRC32CandInstallingcrcmod 410*9c5db199SXin Li """ 411*9c5db199SXin Li src_root = os.path.join(path, 'gsutil', 'third_party', 'crcmod') 412*9c5db199SXin Li 413*9c5db199SXin Li # Try to build it once. 414*9c5db199SXin Li flag = os.path.join(src_root, '.chromite.tried.build') 415*9c5db199SXin Li if os.path.exists(flag): 416*9c5db199SXin Li return 417*9c5db199SXin Li # Flag things now regardless of how the attempt below works out. 418*9c5db199SXin Li try: 419*9c5db199SXin Li osutils.Touch(flag) 420*9c5db199SXin Li except IOError as e: 421*9c5db199SXin Li # If the gsutil dir was cached previously as root, but now we're 422*9c5db199SXin Li # non-root, just flag it and return. 423*9c5db199SXin Li if e.errno == errno.EACCES: 424*9c5db199SXin Li logging.debug('Skipping gsutil crcmod compile due to permissions') 425*9c5db199SXin Li cros_build_lib.sudo_run(['touch', flag], debug_level=logging.DEBUG) 426*9c5db199SXin Li return 427*9c5db199SXin Li else: 428*9c5db199SXin Li raise 429*9c5db199SXin Li 430*9c5db199SXin Li # See if the system includes one in which case we're done. 431*9c5db199SXin Li # We probe `python` as that's what gsutil uses for its shebang. 432*9c5db199SXin Li result = cros_build_lib.run( 433*9c5db199SXin Li ['python', '-c', 'from crcmod.crcmod import _usingExtension; ' 434*9c5db199SXin Li 'exit(0 if _usingExtension else 1)'], check=False, capture_output=True) 435*9c5db199SXin Li if result.returncode == 0: 436*9c5db199SXin Li return 437*9c5db199SXin Li 438*9c5db199SXin Li # See if the local copy has one. 439*9c5db199SXin Li for pyver in ('python2', 'python3'): 440*9c5db199SXin Li logging.debug('Attempting to compile local crcmod for %s gsutil', pyver) 441*9c5db199SXin Li with osutils.TempDir(prefix='chromite.gsutil.crcmod') as tempdir: 442*9c5db199SXin Li result = cros_build_lib.run( 443*9c5db199SXin Li [pyver, 'setup.py', 'build', '--build-base', tempdir, 444*9c5db199SXin Li '--build-platlib', tempdir], 445*9c5db199SXin Li cwd=src_root, capture_output=True, check=False, 446*9c5db199SXin Li debug_level=logging.DEBUG) 447*9c5db199SXin Li if result.returncode: 448*9c5db199SXin Li continue 449*9c5db199SXin Li 450*9c5db199SXin Li # Locate the module in the build dir. 451*9c5db199SXin Li copied = False 452*9c5db199SXin Li for mod_path in glob.glob( 453*9c5db199SXin Li os.path.join(tempdir, 'crcmod', '_crcfunext*.so')): 454*9c5db199SXin Li dst_mod_path = os.path.join(src_root, pyver, 'crcmod', 455*9c5db199SXin Li os.path.basename(mod_path)) 456*9c5db199SXin Li try: 457*9c5db199SXin Li shutil.copy2(mod_path, dst_mod_path) 458*9c5db199SXin Li copied = True 459*9c5db199SXin Li except shutil.Error: 460*9c5db199SXin Li pass 461*9c5db199SXin Li 462*9c5db199SXin Li if not copied: 463*9c5db199SXin Li # If the module compile failed (missing compiler/headers/whatever), 464*9c5db199SXin Li # then the setup.py build command above would have passed, but there 465*9c5db199SXin Li # won't actually be a _crcfunext.so module. Check for it here to 466*9c5db199SXin Li # disambiguate other errors from shutil.copy2. 467*9c5db199SXin Li logging.debug('No crcmod module produced (missing host compiler?)') 468*9c5db199SXin Li continue 469*9c5db199SXin Li 470*9c5db199SXin Li def __init__(self, boto_file=None, cache_dir=None, acl=None, 471*9c5db199SXin Li dry_run=False, gsutil_bin=None, init_boto=False, retries=None, 472*9c5db199SXin Li sleep=None, cache_user=None): 473*9c5db199SXin Li """Constructor. 474*9c5db199SXin Li 475*9c5db199SXin Li Args: 476*9c5db199SXin Li boto_file: Fully qualified path to user's .boto credential file. 477*9c5db199SXin Li cache_dir: The absolute path to the cache directory. Use the default 478*9c5db199SXin Li fallback if not given. 479*9c5db199SXin Li acl: If given, a canned ACL. It is not valid to pass in an ACL file 480*9c5db199SXin Li here, because most gsutil commands do not accept ACL files. If you 481*9c5db199SXin Li would like to use an ACL file, use the SetACL command instead. 482*9c5db199SXin Li dry_run: Testing mode that prints commands that would be run. 483*9c5db199SXin Li gsutil_bin: If given, the absolute path to the gsutil binary. Else 484*9c5db199SXin Li the default fallback will be used. 485*9c5db199SXin Li init_boto: If set to True, GSContext will check during __init__ if a 486*9c5db199SXin Li valid boto config is configured, and if not, will attempt to ask the 487*9c5db199SXin Li user to interactively set up the boto config. 488*9c5db199SXin Li retries: Number of times to retry a command before failing. 489*9c5db199SXin Li sleep: Amount of time to sleep between failures. 490*9c5db199SXin Li cache_user: user for creating cache_dir for gsutil. Default is None. 491*9c5db199SXin Li """ 492*9c5db199SXin Li if gsutil_bin is None: 493*9c5db199SXin Li gsutil_bin = self.GetDefaultGSUtilBin(cache_dir, cache_user=cache_user) 494*9c5db199SXin Li else: 495*9c5db199SXin Li self._CheckFile('gsutil not found', gsutil_bin) 496*9c5db199SXin Li self.gsutil_bin = gsutil_bin 497*9c5db199SXin Li 498*9c5db199SXin Li # The version of gsutil is retrieved on demand and cached here. 499*9c5db199SXin Li self._gsutil_version = None 500*9c5db199SXin Li 501*9c5db199SXin Li # Increase the number of retries. With 10 retries, Boto will try a total of 502*9c5db199SXin Li # 11 times and wait up to 2**11 seconds (~30 minutes) in total, not 503*9c5db199SXin Li # not including the time spent actually uploading or downloading. 504*9c5db199SXin Li self.gsutil_flags = ['-o', 'Boto:num_retries=10'] 505*9c5db199SXin Li 506*9c5db199SXin Li # Set HTTP proxy if environment variable http_proxy is set 507*9c5db199SXin Li # (crbug.com/325032). 508*9c5db199SXin Li if 'http_proxy' in os.environ: 509*9c5db199SXin Li url = urllib.parse.urlparse(os.environ['http_proxy']) 510*9c5db199SXin Li if not url.hostname or (not url.username and url.password): 511*9c5db199SXin Li logging.warning('GS_ERROR: Ignoring env variable http_proxy because it ' 512*9c5db199SXin Li 'is not properly set: %s', os.environ['http_proxy']) 513*9c5db199SXin Li else: 514*9c5db199SXin Li self.gsutil_flags += ['-o', 'Boto:proxy=%s' % url.hostname] 515*9c5db199SXin Li if url.username: 516*9c5db199SXin Li self.gsutil_flags += ['-o', 'Boto:proxy_user=%s' % url.username] 517*9c5db199SXin Li if url.password: 518*9c5db199SXin Li self.gsutil_flags += ['-o', 'Boto:proxy_pass=%s' % url.password] 519*9c5db199SXin Li if url.port: 520*9c5db199SXin Li self.gsutil_flags += ['-o', 'Boto:proxy_port=%d' % url.port] 521*9c5db199SXin Li 522*9c5db199SXin Li # Prefer boto_file if specified, else prefer the env then the default. 523*9c5db199SXin Li if boto_file is None: 524*9c5db199SXin Li boto_file = os.environ.get('BOTO_CONFIG') 525*9c5db199SXin Li if boto_file is None and os.path.isfile(self.DEFAULT_BOTO_FILE): 526*9c5db199SXin Li # Only set boto file to DEFAULT_BOTO_FILE if it exists. 527*9c5db199SXin Li boto_file = self.DEFAULT_BOTO_FILE 528*9c5db199SXin Li 529*9c5db199SXin Li self.boto_file = boto_file 530*9c5db199SXin Li 531*9c5db199SXin Li self.acl = acl 532*9c5db199SXin Li 533*9c5db199SXin Li self.dry_run = dry_run 534*9c5db199SXin Li self.retries = self.DEFAULT_RETRIES if retries is None else int(retries) 535*9c5db199SXin Li self._sleep_time = self.DEFAULT_SLEEP_TIME if sleep is None else int(sleep) 536*9c5db199SXin Li 537*9c5db199SXin Li if init_boto and not dry_run: 538*9c5db199SXin Li # We can't really expect gsutil to even be present in dry_run mode. 539*9c5db199SXin Li self._InitBoto() 540*9c5db199SXin Li 541*9c5db199SXin Li @property 542*9c5db199SXin Li def gsutil_version(self): 543*9c5db199SXin Li """Return the version of the gsutil in this context.""" 544*9c5db199SXin Li if not self._gsutil_version: 545*9c5db199SXin Li if self.dry_run: 546*9c5db199SXin Li self._gsutil_version = self.GSUTIL_VERSION 547*9c5db199SXin Li else: 548*9c5db199SXin Li cmd = ['-q', 'version'] 549*9c5db199SXin Li 550*9c5db199SXin Li # gsutil has been known to return version to stderr in the past, so 551*9c5db199SXin Li # use stderr=subprocess.STDOUT. 552*9c5db199SXin Li result = self.DoCommand(cmd, stdout=True, stderr=subprocess.STDOUT) 553*9c5db199SXin Li 554*9c5db199SXin Li # Expect output like: 'gsutil version 3.35' or 'gsutil version: 4.5'. 555*9c5db199SXin Li match = re.search(r'^\s*gsutil\s+version:?\s+([\d.]+)', result.output, 556*9c5db199SXin Li re.IGNORECASE) 557*9c5db199SXin Li if match: 558*9c5db199SXin Li self._gsutil_version = match.group(1) 559*9c5db199SXin Li else: 560*9c5db199SXin Li raise GSContextException('Unexpected output format from "%s":\n%s.' % 561*9c5db199SXin Li (result.cmdstr, result.output)) 562*9c5db199SXin Li 563*9c5db199SXin Li return self._gsutil_version 564*9c5db199SXin Li 565*9c5db199SXin Li def _CheckFile(self, errmsg, afile): 566*9c5db199SXin Li """Pre-flight check for valid inputs. 567*9c5db199SXin Li 568*9c5db199SXin Li Args: 569*9c5db199SXin Li errmsg: Error message to display. 570*9c5db199SXin Li afile: Fully qualified path to test file existance. 571*9c5db199SXin Li """ 572*9c5db199SXin Li if not os.path.isfile(afile): 573*9c5db199SXin Li raise GSContextException('%s, %s is not a file' % (errmsg, afile)) 574*9c5db199SXin Li 575*9c5db199SXin Li def _TestGSLs(self): 576*9c5db199SXin Li """Quick test of gsutil functionality.""" 577*9c5db199SXin Li # The bucket in question is readable by any authenticated account. 578*9c5db199SXin Li # If we can list it's contents, we have valid authentication. 579*9c5db199SXin Li cmd = ['ls', AUTHENTICATION_BUCKET] 580*9c5db199SXin Li result = self.DoCommand(cmd, retries=0, debug_level=logging.DEBUG, 581*9c5db199SXin Li stderr=True, check=False) 582*9c5db199SXin Li 583*9c5db199SXin Li # Did we fail with an authentication error? 584*9c5db199SXin Li if (result.returncode == 1 and 585*9c5db199SXin Li any(e in result.error for e in self.AUTHORIZATION_ERRORS)): 586*9c5db199SXin Li logging.warning('gsutil authentication failure msg: %s', result.error) 587*9c5db199SXin Li return False 588*9c5db199SXin Li 589*9c5db199SXin Li return True 590*9c5db199SXin Li 591*9c5db199SXin Li def _ConfigureBotoConfig(self): 592*9c5db199SXin Li """Make sure we can access protected bits in GS.""" 593*9c5db199SXin Li print('Configuring gsutil. **Please use your @google.com account.**') 594*9c5db199SXin Li try: 595*9c5db199SXin Li if not self.boto_file: 596*9c5db199SXin Li self.boto_file = self.DEFAULT_BOTO_FILE 597*9c5db199SXin Li self.DoCommand(['config'], retries=0, debug_level=logging.CRITICAL, 598*9c5db199SXin Li print_cmd=False) 599*9c5db199SXin Li finally: 600*9c5db199SXin Li if (os.path.exists(self.boto_file) and not 601*9c5db199SXin Li os.path.getsize(self.boto_file)): 602*9c5db199SXin Li os.remove(self.boto_file) 603*9c5db199SXin Li raise GSContextException('GS config could not be set up.') 604*9c5db199SXin Li 605*9c5db199SXin Li def _InitBoto(self): 606*9c5db199SXin Li if not self._TestGSLs(): 607*9c5db199SXin Li self._ConfigureBotoConfig() 608*9c5db199SXin Li 609*9c5db199SXin Li def Cat(self, path, **kwargs): 610*9c5db199SXin Li """Returns the contents of a GS object.""" 611*9c5db199SXin Li kwargs.setdefault('stdout', True) 612*9c5db199SXin Li encoding = kwargs.setdefault('encoding', None) 613*9c5db199SXin Li errors = kwargs.setdefault('errors', None) 614*9c5db199SXin Li if not PathIsGs(path): 615*9c5db199SXin Li # gsutil doesn't support cat-ting a local path, so read it ourselves. 616*9c5db199SXin Li mode = 'rb' if encoding is None else 'r' 617*9c5db199SXin Li try: 618*9c5db199SXin Li return osutils.ReadFile(path, mode=mode, encoding=encoding, 619*9c5db199SXin Li errors=errors) 620*9c5db199SXin Li except Exception as e: 621*9c5db199SXin Li if getattr(e, 'errno', None) == errno.ENOENT: 622*9c5db199SXin Li raise GSNoSuchKey('Cat Error: file %s does not exist' % path) 623*9c5db199SXin Li else: 624*9c5db199SXin Li raise GSContextException(str(e)) 625*9c5db199SXin Li elif self.dry_run: 626*9c5db199SXin Li return b'' if encoding is None else '' 627*9c5db199SXin Li else: 628*9c5db199SXin Li return self.DoCommand(['cat', path], **kwargs).output 629*9c5db199SXin Li 630*9c5db199SXin Li def StreamingCat(self, path, chunksize=0x100000): 631*9c5db199SXin Li """Returns the content of a GS file as a stream. 632*9c5db199SXin Li 633*9c5db199SXin Li Unlike Cat or Copy, this function doesn't support any internal retry or 634*9c5db199SXin Li validation by computing checksum of downloaded data. Users should perform 635*9c5db199SXin Li their own validation, or use Cat() instead. 636*9c5db199SXin Li 637*9c5db199SXin Li Args: 638*9c5db199SXin Li path: Full gs:// path of the src file. 639*9c5db199SXin Li chunksize: At most how much data read from upstream and yield to callers 640*9c5db199SXin Li at a time. The default value is 1 MB. 641*9c5db199SXin Li 642*9c5db199SXin Li Yields: 643*9c5db199SXin Li The file content, chunk by chunk, as bytes. 644*9c5db199SXin Li """ 645*9c5db199SXin Li assert PathIsGs(path) 646*9c5db199SXin Li 647*9c5db199SXin Li if self.dry_run: 648*9c5db199SXin Li return (lambda: (yield ''))() 649*9c5db199SXin Li 650*9c5db199SXin Li cmd = [self.gsutil_bin] + self.gsutil_flags + ['cat', path] 651*9c5db199SXin Li proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) 652*9c5db199SXin Li 653*9c5db199SXin Li def read_content(): 654*9c5db199SXin Li try: 655*9c5db199SXin Li while True: 656*9c5db199SXin Li data = proc.stdout.read(chunksize) 657*9c5db199SXin Li if not data and proc.poll() is not None: 658*9c5db199SXin Li break 659*9c5db199SXin Li if data: 660*9c5db199SXin Li yield data 661*9c5db199SXin Li 662*9c5db199SXin Li rc = proc.poll() 663*9c5db199SXin Li if rc: 664*9c5db199SXin Li raise GSCommandError( 665*9c5db199SXin Li 'Cannot stream cat %s from Google Storage!' % path, rc, None) 666*9c5db199SXin Li finally: 667*9c5db199SXin Li if proc.returncode is None: 668*9c5db199SXin Li proc.stdout.close() 669*9c5db199SXin Li proc.terminate() 670*9c5db199SXin Li 671*9c5db199SXin Li return read_content() 672*9c5db199SXin Li 673*9c5db199SXin Li def CopyInto(self, local_path, remote_dir, filename=None, **kwargs): 674*9c5db199SXin Li """Upload a local file into a directory in google storage. 675*9c5db199SXin Li 676*9c5db199SXin Li Args: 677*9c5db199SXin Li local_path: Local file path to copy. 678*9c5db199SXin Li remote_dir: Full gs:// url of the directory to transfer the file into. 679*9c5db199SXin Li filename: If given, the filename to place the content at; if not given, 680*9c5db199SXin Li it's discerned from basename(local_path). 681*9c5db199SXin Li **kwargs: See Copy() for documentation. 682*9c5db199SXin Li 683*9c5db199SXin Li Returns: 684*9c5db199SXin Li The generation of the remote file. 685*9c5db199SXin Li """ 686*9c5db199SXin Li filename = filename if filename is not None else local_path 687*9c5db199SXin Li # Basename it even if an explicit filename was given; we don't want 688*9c5db199SXin Li # people using filename as a multi-directory path fragment. 689*9c5db199SXin Li return self.Copy(local_path, 690*9c5db199SXin Li '%s/%s' % (remote_dir, os.path.basename(filename)), 691*9c5db199SXin Li **kwargs) 692*9c5db199SXin Li 693*9c5db199SXin Li @staticmethod 694*9c5db199SXin Li def GetTrackerFilenames(dest_path): 695*9c5db199SXin Li """Returns a list of gsutil tracker filenames. 696*9c5db199SXin Li 697*9c5db199SXin Li Tracker files are used by gsutil to resume downloads/uploads. This 698*9c5db199SXin Li function does not handle parallel uploads. 699*9c5db199SXin Li 700*9c5db199SXin Li Args: 701*9c5db199SXin Li dest_path: Either a GS path or an absolute local path. 702*9c5db199SXin Li 703*9c5db199SXin Li Returns: 704*9c5db199SXin Li The list of potential tracker filenames. 705*9c5db199SXin Li """ 706*9c5db199SXin Li dest = urllib.parse.urlsplit(dest_path) 707*9c5db199SXin Li filenames = [] 708*9c5db199SXin Li if dest.scheme == 'gs': 709*9c5db199SXin Li prefix = 'upload' 710*9c5db199SXin Li bucket_name = dest.netloc 711*9c5db199SXin Li object_name = dest.path.lstrip('/') 712*9c5db199SXin Li filenames.append( 713*9c5db199SXin Li re.sub(r'[/\\]', '_', 'resumable_upload__%s__%s__%s.url' % 714*9c5db199SXin Li (bucket_name, object_name, GSContext.GSUTIL_API_SELECTOR))) 715*9c5db199SXin Li else: 716*9c5db199SXin Li prefix = 'download' 717*9c5db199SXin Li filenames.append( 718*9c5db199SXin Li re.sub(r'[/\\]', '_', 'resumable_download__%s__%s.etag' % 719*9c5db199SXin Li (dest.path, GSContext.GSUTIL_API_SELECTOR))) 720*9c5db199SXin Li 721*9c5db199SXin Li hashed_filenames = [] 722*9c5db199SXin Li for filename in filenames: 723*9c5db199SXin Li m = hashlib.sha1(filename.encode()) 724*9c5db199SXin Li hashed_filenames.append('%s_TRACKER_%s.%s' % 725*9c5db199SXin Li (prefix, m.hexdigest(), filename[-16:])) 726*9c5db199SXin Li 727*9c5db199SXin Li return hashed_filenames 728*9c5db199SXin Li 729*9c5db199SXin Li def _RetryFilter(self, e): 730*9c5db199SXin Li """Returns whether to retry RunCommandError exception |e|. 731*9c5db199SXin Li 732*9c5db199SXin Li Args: 733*9c5db199SXin Li e: Exception object to filter. Exception may be re-raised as 734*9c5db199SXin Li as different type, if _RetryFilter determines a more appropriate 735*9c5db199SXin Li exception type based on the contents of |e|. 736*9c5db199SXin Li """ 737*9c5db199SXin Li error_details = self._MatchKnownError(e) 738*9c5db199SXin Li if error_details.exception: 739*9c5db199SXin Li raise error_details.exception 740*9c5db199SXin Li return error_details.retriable 741*9c5db199SXin Li 742*9c5db199SXin Li def _MatchKnownError(self, e): 743*9c5db199SXin Li """Function to match known RunCommandError exceptions. 744*9c5db199SXin Li 745*9c5db199SXin Li Args: 746*9c5db199SXin Li e: Exception object to filter. 747*9c5db199SXin Li 748*9c5db199SXin Li Returns: 749*9c5db199SXin Li An ErrorDetails instance with details about the message pattern found. 750*9c5db199SXin Li """ 751*9c5db199SXin Li if not retry_util.ShouldRetryCommandCommon(e): 752*9c5db199SXin Li if not isinstance(e, cros_build_lib.RunCommandError): 753*9c5db199SXin Li error_type = 'unknown' 754*9c5db199SXin Li else: 755*9c5db199SXin Li error_type = 'failed_to_launch' 756*9c5db199SXin Li return ErrorDetails(type=error_type, retriable=False) 757*9c5db199SXin Li 758*9c5db199SXin Li # e is guaranteed by above filter to be a RunCommandError 759*9c5db199SXin Li if e.result.returncode < 0: 760*9c5db199SXin Li sig_name = signals.StrSignal(-e.result.returncode) 761*9c5db199SXin Li logging.info('Child process received signal %d; not retrying.', sig_name) 762*9c5db199SXin Li return ErrorDetails(type='received_signal', message_pattern=sig_name, 763*9c5db199SXin Li retriable=False) 764*9c5db199SXin Li 765*9c5db199SXin Li error = e.result.error 766*9c5db199SXin Li if error: 767*9c5db199SXin Li # Since the captured error will use the encoding the user requested, 768*9c5db199SXin Li # normalize to bytes for testing below. 769*9c5db199SXin Li if isinstance(error, six.text_type): 770*9c5db199SXin Li error = error.encode('utf-8') 771*9c5db199SXin Li 772*9c5db199SXin Li # gsutil usually prints PreconditionException when a precondition fails. 773*9c5db199SXin Li # It may also print "ResumableUploadAbortException: 412 Precondition 774*9c5db199SXin Li # Failed", so the logic needs to be a little more general. 775*9c5db199SXin Li if (b'PreconditionException' in error or 776*9c5db199SXin Li b'412 Precondition Failed' in error): 777*9c5db199SXin Li return ErrorDetails(type='precondition_exception', retriable=False, 778*9c5db199SXin Li exception=GSContextPreconditionFailed(e)) 779*9c5db199SXin Li 780*9c5db199SXin Li # If the file does not exist, one of the following errors occurs. The 781*9c5db199SXin Li # "stat" command leaves off the "CommandException: " prefix, but it also 782*9c5db199SXin Li # outputs to stdout instead of stderr and so will not be caught here 783*9c5db199SXin Li # regardless. 784*9c5db199SXin Li if (b'CommandException: No URLs matched' in error or 785*9c5db199SXin Li b'NotFoundException:' in error or 786*9c5db199SXin Li b'One or more URLs matched no objects' in error): 787*9c5db199SXin Li return ErrorDetails(type='no_such_key', retriable=False, 788*9c5db199SXin Li exception=GSNoSuchKey(e)) 789*9c5db199SXin Li 790*9c5db199SXin Li logging.warning('GS_ERROR: %s ', error) 791*9c5db199SXin Li 792*9c5db199SXin Li # Temporary fix: remove the gsutil tracker files so that our retry 793*9c5db199SXin Li # can hit a different backend. This should be removed after the 794*9c5db199SXin Li # bug is fixed by the Google Storage team (see crbug.com/308300). 795*9c5db199SXin Li resumable_error = _FirstSubstring(error, self.RESUMABLE_ERROR_MESSAGE) 796*9c5db199SXin Li if resumable_error: 797*9c5db199SXin Li # Only remove the tracker files if we try to upload/download a file. 798*9c5db199SXin Li if 'cp' in e.result.cmd[:-2]: 799*9c5db199SXin Li # Assume a command: gsutil [options] cp [options] src_path dest_path 800*9c5db199SXin Li # dest_path needs to be a fully qualified local path, which is already 801*9c5db199SXin Li # required for GSContext.Copy(). 802*9c5db199SXin Li tracker_filenames = self.GetTrackerFilenames(e.result.cmd[-1]) 803*9c5db199SXin Li logging.info('Potential list of tracker files: %s', 804*9c5db199SXin Li tracker_filenames) 805*9c5db199SXin Li for tracker_filename in tracker_filenames: 806*9c5db199SXin Li tracker_file_path = os.path.join(self.DEFAULT_GSUTIL_TRACKER_DIR, 807*9c5db199SXin Li tracker_filename) 808*9c5db199SXin Li if os.path.exists(tracker_file_path): 809*9c5db199SXin Li logging.info('Deleting gsutil tracker file %s before retrying.', 810*9c5db199SXin Li tracker_file_path) 811*9c5db199SXin Li logging.info('The content of the tracker file: %s', 812*9c5db199SXin Li osutils.ReadFile(tracker_file_path)) 813*9c5db199SXin Li osutils.SafeUnlink(tracker_file_path) 814*9c5db199SXin Li return ErrorDetails(type='resumable', 815*9c5db199SXin Li message_pattern=resumable_error.decode('utf-8'), 816*9c5db199SXin Li retriable=True) 817*9c5db199SXin Li 818*9c5db199SXin Li transient_error = _FirstSubstring(error, self.TRANSIENT_ERROR_MESSAGE) 819*9c5db199SXin Li if transient_error: 820*9c5db199SXin Li return ErrorDetails(type='transient', 821*9c5db199SXin Li message_pattern=transient_error.decode('utf-8'), 822*9c5db199SXin Li retriable=True) 823*9c5db199SXin Li 824*9c5db199SXin Li return ErrorDetails(type='unknown', retriable=False) 825*9c5db199SXin Li 826*9c5db199SXin Li # TODO(mtennant): Make a private method. 827*9c5db199SXin Li def DoCommand(self, gsutil_cmd, headers=(), retries=None, version=None, 828*9c5db199SXin Li parallel=False, **kwargs): 829*9c5db199SXin Li """Run a gsutil command, suppressing output, and setting retry/sleep. 830*9c5db199SXin Li 831*9c5db199SXin Li Args: 832*9c5db199SXin Li gsutil_cmd: The (mostly) constructed gsutil subcommand to run. 833*9c5db199SXin Li headers: A list of raw headers to pass down. 834*9c5db199SXin Li parallel: Whether gsutil should enable parallel copy/update of multiple 835*9c5db199SXin Li files. NOTE: This option causes gsutil to use significantly more 836*9c5db199SXin Li memory, even if gsutil is only uploading one file. 837*9c5db199SXin Li retries: How many times to retry this command (defaults to setting given 838*9c5db199SXin Li at object creation). 839*9c5db199SXin Li version: If given, the generation; essentially the timestamp of the last 840*9c5db199SXin Li update. Note this is not the same as sequence-number; it's 841*9c5db199SXin Li monotonically increasing bucket wide rather than reset per file. 842*9c5db199SXin Li The usage of this is if we intend to replace/update only if the version 843*9c5db199SXin Li is what we expect. This is useful for distributed reasons- for example, 844*9c5db199SXin Li to ensure you don't overwrite someone else's creation, a version of 845*9c5db199SXin Li 0 states "only update if no version exists". 846*9c5db199SXin Li 847*9c5db199SXin Li Returns: 848*9c5db199SXin Li A RunCommandResult object. 849*9c5db199SXin Li """ 850*9c5db199SXin Li kwargs = kwargs.copy() 851*9c5db199SXin Li kwargs.setdefault('stderr', True) 852*9c5db199SXin Li kwargs.setdefault('encoding', 'utf-8') 853*9c5db199SXin Li 854*9c5db199SXin Li cmd = [self.gsutil_bin] 855*9c5db199SXin Li cmd += self.gsutil_flags 856*9c5db199SXin Li for header in headers: 857*9c5db199SXin Li cmd += ['-h', header] 858*9c5db199SXin Li if version is not None: 859*9c5db199SXin Li cmd += ['-h', 'x-goog-if-generation-match:%d' % int(version)] 860*9c5db199SXin Li 861*9c5db199SXin Li # Enable parallel copy/update of multiple files if stdin is not to 862*9c5db199SXin Li # be piped to the command. This does not split a single file into 863*9c5db199SXin Li # smaller components for upload. 864*9c5db199SXin Li if parallel and kwargs.get('input') is None: 865*9c5db199SXin Li cmd += ['-m'] 866*9c5db199SXin Li 867*9c5db199SXin Li cmd.extend(gsutil_cmd) 868*9c5db199SXin Li 869*9c5db199SXin Li if retries is None: 870*9c5db199SXin Li retries = self.retries 871*9c5db199SXin Li 872*9c5db199SXin Li extra_env = kwargs.pop('extra_env', {}) 873*9c5db199SXin Li if self.boto_file and os.path.isfile(self.boto_file): 874*9c5db199SXin Li extra_env.setdefault('BOTO_CONFIG', self.boto_file) 875*9c5db199SXin Li 876*9c5db199SXin Li if self.dry_run: 877*9c5db199SXin Li logging.debug("%s: would've run: %s", self.__class__.__name__, 878*9c5db199SXin Li cros_build_lib.CmdToStr(cmd)) 879*9c5db199SXin Li else: 880*9c5db199SXin Li try: 881*9c5db199SXin Li return retry_stats.RetryWithStats(retry_stats.GSUTIL, 882*9c5db199SXin Li self._RetryFilter, 883*9c5db199SXin Li retries, cros_build_lib.run, 884*9c5db199SXin Li cmd, sleep=self._sleep_time, 885*9c5db199SXin Li extra_env=extra_env, **kwargs) 886*9c5db199SXin Li except cros_build_lib.RunCommandError as e: 887*9c5db199SXin Li raise GSCommandError(e.msg, e.result, e.exception) 888*9c5db199SXin Li 889*9c5db199SXin Li def Copy(self, src_path, dest_path, acl=None, recursive=False, 890*9c5db199SXin Li skip_symlinks=True, auto_compress=False, **kwargs): 891*9c5db199SXin Li """Copy to/from GS bucket. 892*9c5db199SXin Li 893*9c5db199SXin Li Canned ACL permissions can be specified on the gsutil cp command line. 894*9c5db199SXin Li 895*9c5db199SXin Li More info: 896*9c5db199SXin Li https://developers.google.com/storage/docs/accesscontrol#applyacls 897*9c5db199SXin Li 898*9c5db199SXin Li Args: 899*9c5db199SXin Li src_path: Fully qualified local path or full gs:// path of the src file. 900*9c5db199SXin Li dest_path: Fully qualified local path or full gs:// path of the dest 901*9c5db199SXin Li file. 902*9c5db199SXin Li acl: One of the google storage canned_acls to apply. 903*9c5db199SXin Li recursive: Whether to copy recursively. 904*9c5db199SXin Li skip_symlinks: Skip symbolic links when copying recursively. 905*9c5db199SXin Li auto_compress: Automatically compress with gzip when uploading. 906*9c5db199SXin Li 907*9c5db199SXin Li Returns: 908*9c5db199SXin Li The generation of the remote file. 909*9c5db199SXin Li 910*9c5db199SXin Li Raises: 911*9c5db199SXin Li RunCommandError if the command failed despite retries. 912*9c5db199SXin Li """ 913*9c5db199SXin Li # -v causes gs://bucket/path#generation to be listed in output. 914*9c5db199SXin Li cmd = ['cp', '-v'] 915*9c5db199SXin Li 916*9c5db199SXin Li # Certain versions of gsutil (at least 4.3) assume the source of a copy is 917*9c5db199SXin Li # a directory if the -r option is used. If it's really a file, gsutil will 918*9c5db199SXin Li # look like it's uploading it but not actually do anything. We'll work 919*9c5db199SXin Li # around that problem by surpressing the -r flag if we detect the source 920*9c5db199SXin Li # is a local file. 921*9c5db199SXin Li if recursive and not os.path.isfile(src_path): 922*9c5db199SXin Li cmd.append('-r') 923*9c5db199SXin Li if skip_symlinks: 924*9c5db199SXin Li cmd.append('-e') 925*9c5db199SXin Li 926*9c5db199SXin Li if auto_compress: 927*9c5db199SXin Li cmd.append('-Z') 928*9c5db199SXin Li 929*9c5db199SXin Li acl = self.acl if acl is None else acl 930*9c5db199SXin Li if acl is not None: 931*9c5db199SXin Li cmd += ['-a', acl] 932*9c5db199SXin Li 933*9c5db199SXin Li with cros_build_lib.ContextManagerStack() as stack: 934*9c5db199SXin Li # Write the input into a tempfile if possible. This is needed so that 935*9c5db199SXin Li # gsutil can retry failed requests. We allow the input to be a string 936*9c5db199SXin Li # or bytes regardless of the output encoding. 937*9c5db199SXin Li if src_path == '-' and kwargs.get('input') is not None: 938*9c5db199SXin Li f = stack.Add(tempfile.NamedTemporaryFile, mode='wb') 939*9c5db199SXin Li data = kwargs['input'] 940*9c5db199SXin Li if isinstance(data, six.text_type): 941*9c5db199SXin Li data = data.encode('utf-8') 942*9c5db199SXin Li f.write(data) 943*9c5db199SXin Li f.flush() 944*9c5db199SXin Li del kwargs['input'] 945*9c5db199SXin Li src_path = f.name 946*9c5db199SXin Li 947*9c5db199SXin Li cmd += ['--', src_path, dest_path] 948*9c5db199SXin Li 949*9c5db199SXin Li if not (PathIsGs(src_path) or PathIsGs(dest_path)): 950*9c5db199SXin Li # Don't retry on local copies. 951*9c5db199SXin Li kwargs.setdefault('retries', 0) 952*9c5db199SXin Li 953*9c5db199SXin Li kwargs['capture_output'] = True 954*9c5db199SXin Li try: 955*9c5db199SXin Li result = self.DoCommand(cmd, **kwargs) 956*9c5db199SXin Li if self.dry_run: 957*9c5db199SXin Li return None 958*9c5db199SXin Li 959*9c5db199SXin Li # Now we parse the output for the current generation number. Example: 960*9c5db199SXin Li # Created: gs://chromeos-throw-away-bucket/foo#1360630664537000.1 961*9c5db199SXin Li m = re.search(r'Created: .*#(\d+)([.](\d+))?\n', result.error) 962*9c5db199SXin Li if m: 963*9c5db199SXin Li return int(m.group(1)) 964*9c5db199SXin Li else: 965*9c5db199SXin Li return None 966*9c5db199SXin Li except GSNoSuchKey as e: 967*9c5db199SXin Li # If the source was a local file, the error is a quirk of gsutil 4.5 968*9c5db199SXin Li # and should be ignored. If the source was remote, there might 969*9c5db199SXin Li # legitimately be no such file. See crbug.com/393419. 970*9c5db199SXin Li if os.path.isfile(src_path): 971*9c5db199SXin Li return None 972*9c5db199SXin Li 973*9c5db199SXin Li # Temp log for crbug.com/642986, should be removed when the bug 974*9c5db199SXin Li # is fixed. 975*9c5db199SXin Li logging.warning('Copy Error: src %s dest %s: %s ' 976*9c5db199SXin Li '(Temp log for crbug.com/642986)', 977*9c5db199SXin Li src_path, dest_path, e) 978*9c5db199SXin Li raise 979*9c5db199SXin Li 980*9c5db199SXin Li def CreateWithContents(self, gs_uri, contents, **kwargs): 981*9c5db199SXin Li """Creates the specified file with specified contents. 982*9c5db199SXin Li 983*9c5db199SXin Li Args: 984*9c5db199SXin Li gs_uri: The URI of a file on Google Storage. 985*9c5db199SXin Li contents: String or bytes with contents to write to the file. 986*9c5db199SXin Li kwargs: See additional options that Copy takes. 987*9c5db199SXin Li 988*9c5db199SXin Li Raises: 989*9c5db199SXin Li See Copy. 990*9c5db199SXin Li """ 991*9c5db199SXin Li self.Copy('-', gs_uri, input=contents, **kwargs) 992*9c5db199SXin Li 993*9c5db199SXin Li # TODO: Merge LS() and List()? 994*9c5db199SXin Li def LS(self, path, **kwargs): 995*9c5db199SXin Li """Does a directory listing of the given gs path. 996*9c5db199SXin Li 997*9c5db199SXin Li Args: 998*9c5db199SXin Li path: The path to get a listing of. 999*9c5db199SXin Li kwargs: See options that DoCommand takes. 1000*9c5db199SXin Li 1001*9c5db199SXin Li Returns: 1002*9c5db199SXin Li A list of paths that matched |path|. Might be more than one if a 1003*9c5db199SXin Li directory or path include wildcards/etc... 1004*9c5db199SXin Li """ 1005*9c5db199SXin Li if self.dry_run: 1006*9c5db199SXin Li return [] 1007*9c5db199SXin Li 1008*9c5db199SXin Li if not PathIsGs(path): 1009*9c5db199SXin Li # gsutil doesn't support listing a local path, so just run 'ls'. 1010*9c5db199SXin Li kwargs.pop('retries', None) 1011*9c5db199SXin Li kwargs.pop('headers', None) 1012*9c5db199SXin Li kwargs['capture_output'] = True 1013*9c5db199SXin Li kwargs.setdefault('encoding', 'utf-8') 1014*9c5db199SXin Li result = cros_build_lib.run(['ls', path], **kwargs) 1015*9c5db199SXin Li return result.output.splitlines() 1016*9c5db199SXin Li else: 1017*9c5db199SXin Li return [x.url for x in self.List(path, **kwargs)] 1018*9c5db199SXin Li 1019*9c5db199SXin Li def List(self, path, details=False, **kwargs): 1020*9c5db199SXin Li """Does a directory listing of the given gs path. 1021*9c5db199SXin Li 1022*9c5db199SXin Li Args: 1023*9c5db199SXin Li path: The path to get a listing of. 1024*9c5db199SXin Li details: Whether to include size/timestamp info. 1025*9c5db199SXin Li kwargs: See options that DoCommand takes. 1026*9c5db199SXin Li 1027*9c5db199SXin Li Returns: 1028*9c5db199SXin Li A list of GSListResult objects that matched |path|. Might be more 1029*9c5db199SXin Li than one if a directory or path include wildcards/etc... 1030*9c5db199SXin Li """ 1031*9c5db199SXin Li ret = [] 1032*9c5db199SXin Li if self.dry_run: 1033*9c5db199SXin Li return ret 1034*9c5db199SXin Li 1035*9c5db199SXin Li cmd = ['ls'] 1036*9c5db199SXin Li if details: 1037*9c5db199SXin Li cmd += ['-l'] 1038*9c5db199SXin Li cmd += ['--', path] 1039*9c5db199SXin Li 1040*9c5db199SXin Li # We always request the extended details as the overhead compared to a plain 1041*9c5db199SXin Li # listing is negligible. 1042*9c5db199SXin Li kwargs['stdout'] = True 1043*9c5db199SXin Li lines = self.DoCommand(cmd, **kwargs).output.splitlines() 1044*9c5db199SXin Li 1045*9c5db199SXin Li if details: 1046*9c5db199SXin Li # The last line is expected to be a summary line. Ignore it. 1047*9c5db199SXin Li lines = lines[:-1] 1048*9c5db199SXin Li ls_re = LS_LA_RE 1049*9c5db199SXin Li else: 1050*9c5db199SXin Li ls_re = LS_RE 1051*9c5db199SXin Li 1052*9c5db199SXin Li # Handle optional fields. 1053*9c5db199SXin Li intify = lambda x: int(x) if x else None 1054*9c5db199SXin Li 1055*9c5db199SXin Li # Parse out each result and build up the results list. 1056*9c5db199SXin Li for line in lines: 1057*9c5db199SXin Li match = ls_re.search(line) 1058*9c5db199SXin Li if not match: 1059*9c5db199SXin Li raise GSContextException('unable to parse line: %s' % line) 1060*9c5db199SXin Li if match.group('creation_time'): 1061*9c5db199SXin Li timestamp = datetime.datetime.strptime(match.group('creation_time'), 1062*9c5db199SXin Li DATETIME_FORMAT) 1063*9c5db199SXin Li else: 1064*9c5db199SXin Li timestamp = None 1065*9c5db199SXin Li 1066*9c5db199SXin Li ret.append(GSListResult( 1067*9c5db199SXin Li content_length=intify(match.group('content_length')), 1068*9c5db199SXin Li creation_time=timestamp, 1069*9c5db199SXin Li url=match.group('url'), 1070*9c5db199SXin Li generation=intify(match.group('generation')), 1071*9c5db199SXin Li metageneration=intify(match.group('metageneration')))) 1072*9c5db199SXin Li 1073*9c5db199SXin Li return ret 1074*9c5db199SXin Li 1075*9c5db199SXin Li def GetSize(self, path, **kwargs): 1076*9c5db199SXin Li """Returns size of a single object (local or GS).""" 1077*9c5db199SXin Li if not PathIsGs(path): 1078*9c5db199SXin Li return os.path.getsize(path) 1079*9c5db199SXin Li else: 1080*9c5db199SXin Li return self.Stat(path, **kwargs).content_length 1081*9c5db199SXin Li 1082*9c5db199SXin Li def Move(self, src_path, dest_path, **kwargs): 1083*9c5db199SXin Li """Move/rename to/from GS bucket. 1084*9c5db199SXin Li 1085*9c5db199SXin Li Args: 1086*9c5db199SXin Li src_path: Fully qualified local path or full gs:// path of the src file. 1087*9c5db199SXin Li dest_path: Fully qualified local path or full gs:// path of the dest file. 1088*9c5db199SXin Li kwargs: See options that DoCommand takes. 1089*9c5db199SXin Li """ 1090*9c5db199SXin Li cmd = ['mv', '--', src_path, dest_path] 1091*9c5db199SXin Li return self.DoCommand(cmd, **kwargs) 1092*9c5db199SXin Li 1093*9c5db199SXin Li def SetACL(self, upload_url, acl=None, **kwargs): 1094*9c5db199SXin Li """Set access on a file already in google storage. 1095*9c5db199SXin Li 1096*9c5db199SXin Li Args: 1097*9c5db199SXin Li upload_url: gs:// url that will have acl applied to it. 1098*9c5db199SXin Li acl: An ACL permissions file or canned ACL. 1099*9c5db199SXin Li kwargs: See options that DoCommand takes. 1100*9c5db199SXin Li """ 1101*9c5db199SXin Li if acl is None: 1102*9c5db199SXin Li if not self.acl: 1103*9c5db199SXin Li raise GSContextException( 1104*9c5db199SXin Li 'SetAcl invoked w/out a specified acl, nor a default acl.') 1105*9c5db199SXin Li acl = self.acl 1106*9c5db199SXin Li 1107*9c5db199SXin Li self.DoCommand(['acl', 'set', acl, upload_url], **kwargs) 1108*9c5db199SXin Li 1109*9c5db199SXin Li def ChangeACL(self, upload_url, acl_args_file=None, acl_args=None, **kwargs): 1110*9c5db199SXin Li """Change access on a file already in google storage with "acl ch". 1111*9c5db199SXin Li 1112*9c5db199SXin Li Args: 1113*9c5db199SXin Li upload_url: gs:// url that will have acl applied to it. 1114*9c5db199SXin Li acl_args_file: A file with arguments to the gsutil acl ch command. The 1115*9c5db199SXin Li arguments can be spread across multiple lines. Comments 1116*9c5db199SXin Li start with a # character and extend to the end of the 1117*9c5db199SXin Li line. Exactly one of this argument or acl_args must be 1118*9c5db199SXin Li set. 1119*9c5db199SXin Li acl_args: A list of arguments for the gsutil acl ch command. Exactly 1120*9c5db199SXin Li one of this argument or acl_args must be set. 1121*9c5db199SXin Li kwargs: See options that DoCommand takes. 1122*9c5db199SXin Li """ 1123*9c5db199SXin Li if acl_args_file and acl_args: 1124*9c5db199SXin Li raise GSContextException( 1125*9c5db199SXin Li 'ChangeACL invoked with both acl_args and acl_args set.') 1126*9c5db199SXin Li if not acl_args_file and not acl_args: 1127*9c5db199SXin Li raise GSContextException( 1128*9c5db199SXin Li 'ChangeACL invoked with neither acl_args nor acl_args set.') 1129*9c5db199SXin Li 1130*9c5db199SXin Li if acl_args_file: 1131*9c5db199SXin Li lines = osutils.ReadFile(acl_args_file).splitlines() 1132*9c5db199SXin Li # Strip out comments. 1133*9c5db199SXin Li lines = [x.split('#', 1)[0].strip() for x in lines] 1134*9c5db199SXin Li acl_args = ' '.join([x for x in lines if x]).split() 1135*9c5db199SXin Li 1136*9c5db199SXin Li # Some versions of gsutil bubble up precondition failures even when we 1137*9c5db199SXin Li # didn't request it due to how ACL changes happen internally to gsutil. 1138*9c5db199SXin Li # https://crbug.com/763450 1139*9c5db199SXin Li # We keep the retry limit a bit low because DoCommand already has its 1140*9c5db199SXin Li # own level of retries. 1141*9c5db199SXin Li retry_util.RetryException( 1142*9c5db199SXin Li GSContextPreconditionFailed, 3, self.DoCommand, 1143*9c5db199SXin Li ['acl', 'ch'] + acl_args + [upload_url], **kwargs) 1144*9c5db199SXin Li 1145*9c5db199SXin Li def Exists(self, path, **kwargs): 1146*9c5db199SXin Li """Checks whether the given object exists. 1147*9c5db199SXin Li 1148*9c5db199SXin Li Args: 1149*9c5db199SXin Li path: Local path or gs:// url to check. 1150*9c5db199SXin Li kwargs: Flags to pass to DoCommand. 1151*9c5db199SXin Li 1152*9c5db199SXin Li Returns: 1153*9c5db199SXin Li True if the path exists; otherwise returns False. 1154*9c5db199SXin Li """ 1155*9c5db199SXin Li if not PathIsGs(path): 1156*9c5db199SXin Li return os.path.exists(path) 1157*9c5db199SXin Li 1158*9c5db199SXin Li try: 1159*9c5db199SXin Li self.Stat(path, **kwargs) 1160*9c5db199SXin Li except GSNoSuchKey: 1161*9c5db199SXin Li return False 1162*9c5db199SXin Li 1163*9c5db199SXin Li return True 1164*9c5db199SXin Li 1165*9c5db199SXin Li def Remove(self, path, recursive=False, ignore_missing=False, **kwargs): 1166*9c5db199SXin Li """Remove the specified file. 1167*9c5db199SXin Li 1168*9c5db199SXin Li Args: 1169*9c5db199SXin Li path: Full gs:// url of the file to delete. 1170*9c5db199SXin Li recursive: Remove recursively starting at path. 1171*9c5db199SXin Li ignore_missing: Whether to suppress errors about missing files. 1172*9c5db199SXin Li kwargs: Flags to pass to DoCommand. 1173*9c5db199SXin Li """ 1174*9c5db199SXin Li cmd = ['rm'] 1175*9c5db199SXin Li if 'recurse' in kwargs: 1176*9c5db199SXin Li raise TypeError('"recurse" has been renamed to "recursive"') 1177*9c5db199SXin Li if recursive: 1178*9c5db199SXin Li cmd.append('-R') 1179*9c5db199SXin Li cmd.append('--') 1180*9c5db199SXin Li cmd.append(path) 1181*9c5db199SXin Li try: 1182*9c5db199SXin Li self.DoCommand(cmd, **kwargs) 1183*9c5db199SXin Li except GSNoSuchKey: 1184*9c5db199SXin Li if not ignore_missing: 1185*9c5db199SXin Li raise 1186*9c5db199SXin Li 1187*9c5db199SXin Li def GetGeneration(self, path): 1188*9c5db199SXin Li """Get the generation and metageneration of the given |path|. 1189*9c5db199SXin Li 1190*9c5db199SXin Li Returns: 1191*9c5db199SXin Li A tuple of the generation and metageneration. 1192*9c5db199SXin Li """ 1193*9c5db199SXin Li try: 1194*9c5db199SXin Li res = self.Stat(path) 1195*9c5db199SXin Li except GSNoSuchKey: 1196*9c5db199SXin Li return 0, 0 1197*9c5db199SXin Li 1198*9c5db199SXin Li return res.generation, res.metageneration 1199*9c5db199SXin Li 1200*9c5db199SXin Li def Stat(self, path, **kwargs): 1201*9c5db199SXin Li """Stat a GS file, and get detailed information. 1202*9c5db199SXin Li 1203*9c5db199SXin Li Args: 1204*9c5db199SXin Li path: A GS path for files to Stat. Wildcards are NOT supported. 1205*9c5db199SXin Li kwargs: Flags to pass to DoCommand. 1206*9c5db199SXin Li 1207*9c5db199SXin Li Returns: 1208*9c5db199SXin Li A GSStatResult object with all fields populated. 1209*9c5db199SXin Li 1210*9c5db199SXin Li Raises: 1211*9c5db199SXin Li Assorted GSContextException exceptions. 1212*9c5db199SXin Li """ 1213*9c5db199SXin Li try: 1214*9c5db199SXin Li res = self.DoCommand(['stat', '--', path], stdout=True, **kwargs) 1215*9c5db199SXin Li except GSCommandError as e: 1216*9c5db199SXin Li # Because the 'gsutil stat' command logs errors itself (instead of 1217*9c5db199SXin Li # raising errors internally like other commands), we have to look 1218*9c5db199SXin Li # for errors ourselves. See the related bug report here: 1219*9c5db199SXin Li # https://github.com/GoogleCloudPlatform/gsutil/issues/288 1220*9c5db199SXin Li # Example line: 1221*9c5db199SXin Li # No URLs matched gs://bucket/file 1222*9c5db199SXin Li if e.result.error and e.result.error.startswith('No URLs matched'): 1223*9c5db199SXin Li raise GSNoSuchKey('Stat Error: No URLs matched %s.' % path) 1224*9c5db199SXin Li 1225*9c5db199SXin Li # No idea what this is, so just choke. 1226*9c5db199SXin Li raise 1227*9c5db199SXin Li 1228*9c5db199SXin Li # In dryrun mode, DoCommand doesn't return an object, so we need to fake 1229*9c5db199SXin Li # out the behavior ourselves. 1230*9c5db199SXin Li if self.dry_run: 1231*9c5db199SXin Li return GSStatResult( 1232*9c5db199SXin Li creation_time=datetime.datetime.now(), 1233*9c5db199SXin Li content_length=0, 1234*9c5db199SXin Li content_type='application/octet-stream', 1235*9c5db199SXin Li hash_crc32c='AAAAAA==', 1236*9c5db199SXin Li hash_md5='', 1237*9c5db199SXin Li etag='', 1238*9c5db199SXin Li generation=0, 1239*9c5db199SXin Li metageneration=0) 1240*9c5db199SXin Li 1241*9c5db199SXin Li # We expect Stat output like the following. However, the Content-Language 1242*9c5db199SXin Li # line appears to be optional based on how the file in question was 1243*9c5db199SXin Li # created. 1244*9c5db199SXin Li # 1245*9c5db199SXin Li # gs://bucket/path/file: 1246*9c5db199SXin Li # Creation time: Sat, 23 Aug 2014 06:53:20 GMT 1247*9c5db199SXin Li # Content-Language: en 1248*9c5db199SXin Li # Content-Length: 74 1249*9c5db199SXin Li # Content-Type: application/octet-stream 1250*9c5db199SXin Li # Hash (crc32c): BBPMPA== 1251*9c5db199SXin Li # Hash (md5): ms+qSYvgI9SjXn8tW/5UpQ== 1252*9c5db199SXin Li # ETag: CNCgocbmqMACEAE= 1253*9c5db199SXin Li # Generation: 1408776800850000 1254*9c5db199SXin Li # Metageneration: 1 1255*9c5db199SXin Li 1256*9c5db199SXin Li if not res.output.startswith('gs://'): 1257*9c5db199SXin Li raise GSContextException('Unexpected stat output: %s' % res.output) 1258*9c5db199SXin Li 1259*9c5db199SXin Li def _GetField(name, optional=False): 1260*9c5db199SXin Li m = re.search(r'%s:\s*(.+)' % re.escape(name), res.output) 1261*9c5db199SXin Li if m: 1262*9c5db199SXin Li return m.group(1) 1263*9c5db199SXin Li elif optional: 1264*9c5db199SXin Li return None 1265*9c5db199SXin Li else: 1266*9c5db199SXin Li raise GSContextException('Field "%s" missing in "%s"' % 1267*9c5db199SXin Li (name, res.output)) 1268*9c5db199SXin Li 1269*9c5db199SXin Li return GSStatResult( 1270*9c5db199SXin Li creation_time=datetime.datetime.strptime( 1271*9c5db199SXin Li _GetField('Creation time'), '%a, %d %b %Y %H:%M:%S %Z'), 1272*9c5db199SXin Li content_length=int(_GetField('Content-Length')), 1273*9c5db199SXin Li content_type=_GetField('Content-Type'), 1274*9c5db199SXin Li hash_crc32c=_GetField('Hash (crc32c)'), 1275*9c5db199SXin Li hash_md5=_GetField('Hash (md5)', optional=True), 1276*9c5db199SXin Li etag=_GetField('ETag'), 1277*9c5db199SXin Li generation=int(_GetField('Generation')), 1278*9c5db199SXin Li metageneration=int(_GetField('Metageneration'))) 1279*9c5db199SXin Li 1280*9c5db199SXin Li def Counter(self, path): 1281*9c5db199SXin Li """Return a GSCounter object pointing at a |path| in Google Storage. 1282*9c5db199SXin Li 1283*9c5db199SXin Li Args: 1284*9c5db199SXin Li path: The path to the counter in Google Storage. 1285*9c5db199SXin Li """ 1286*9c5db199SXin Li return GSCounter(self, path) 1287*9c5db199SXin Li 1288*9c5db199SXin Li def WaitForGsPaths(self, paths, timeout, period=10): 1289*9c5db199SXin Li """Wait until a list of files exist in GS. 1290*9c5db199SXin Li 1291*9c5db199SXin Li Args: 1292*9c5db199SXin Li paths: The list of files to wait for. 1293*9c5db199SXin Li timeout: Max seconds to wait for file to appear. 1294*9c5db199SXin Li period: How often to check for files while waiting. 1295*9c5db199SXin Li 1296*9c5db199SXin Li Raises: 1297*9c5db199SXin Li timeout_util.TimeoutError if the timeout is reached. 1298*9c5db199SXin Li """ 1299*9c5db199SXin Li # Copy the list of URIs to wait for, so we don't modify the callers context. 1300*9c5db199SXin Li pending_paths = paths[:] 1301*9c5db199SXin Li 1302*9c5db199SXin Li def _CheckForExistence(): 1303*9c5db199SXin Li pending_paths[:] = [x for x in pending_paths if not self.Exists(x)] 1304*9c5db199SXin Li 1305*9c5db199SXin Li def _Retry(_return_value): 1306*9c5db199SXin Li # Retry, if there are any pending paths left. 1307*9c5db199SXin Li return pending_paths 1308*9c5db199SXin Li 1309*9c5db199SXin Li timeout_util.WaitForSuccess(_Retry, _CheckForExistence, 1310*9c5db199SXin Li timeout=timeout, period=period) 1311*9c5db199SXin Li 1312*9c5db199SXin Li def ContainsWildcard(self, url): 1313*9c5db199SXin Li """Checks whether url_string contains a wildcard. 1314*9c5db199SXin Li 1315*9c5db199SXin Li Args: 1316*9c5db199SXin Li url: URL string to check. 1317*9c5db199SXin Li 1318*9c5db199SXin Li Returns: 1319*9c5db199SXin Li True if |url| contains a wildcard. 1320*9c5db199SXin Li """ 1321*9c5db199SXin Li return bool(WILDCARD_REGEX.search(url)) 1322*9c5db199SXin Li 1323*9c5db199SXin Li def GetGsNamesWithWait(self, pattern, url, timeout=600, period=10, 1324*9c5db199SXin Li is_regex_pattern=False): 1325*9c5db199SXin Li """Returns the google storage names specified by the given pattern. 1326*9c5db199SXin Li 1327*9c5db199SXin Li This method polls Google Storage until the target files specified by the 1328*9c5db199SXin Li pattern is available or until the timeout occurs. Because we may not know 1329*9c5db199SXin Li the exact name of the target files, the method accepts a filename pattern, 1330*9c5db199SXin Li to identify whether a file whose name matches the pattern exists 1331*9c5db199SXin Li (e.g. use pattern '*_full_*' to search for the full payload 1332*9c5db199SXin Li 'chromeos_R17-1413.0.0-a1_x86-mario_full_dev.bin'). Returns the name only 1333*9c5db199SXin Li if found before the timeout. 1334*9c5db199SXin Li 1335*9c5db199SXin Li Warning: GS listing are not perfect, and are eventually consistent. Doing a 1336*9c5db199SXin Li search for file existence is a 'best effort'. Calling code should be aware 1337*9c5db199SXin Li and ready to handle that. 1338*9c5db199SXin Li 1339*9c5db199SXin Li Args: 1340*9c5db199SXin Li pattern: a path pattern (glob or regex) identifying the files we need. 1341*9c5db199SXin Li url: URL of the Google Storage bucket. 1342*9c5db199SXin Li timeout: how many seconds are we allowed to keep trying. 1343*9c5db199SXin Li period: how many seconds to wait between attempts. 1344*9c5db199SXin Li is_regex_pattern: Whether the pattern is a regex (otherwise a glob). 1345*9c5db199SXin Li 1346*9c5db199SXin Li Returns: 1347*9c5db199SXin Li The list of files matching the pattern in Google Storage bucket or None 1348*9c5db199SXin Li if the files are not found and hit the timeout_util.TimeoutError. 1349*9c5db199SXin Li """ 1350*9c5db199SXin Li def _GetGsName(): 1351*9c5db199SXin Li uploaded_list = [os.path.basename(p.url) for p in self.List(url)] 1352*9c5db199SXin Li 1353*9c5db199SXin Li if is_regex_pattern: 1354*9c5db199SXin Li filter_re = re.compile(pattern) 1355*9c5db199SXin Li matching_names = [f for f in uploaded_list 1356*9c5db199SXin Li if filter_re.search(f) is not None] 1357*9c5db199SXin Li else: 1358*9c5db199SXin Li matching_names = fnmatch.filter(uploaded_list, pattern) 1359*9c5db199SXin Li 1360*9c5db199SXin Li return matching_names 1361*9c5db199SXin Li 1362*9c5db199SXin Li try: 1363*9c5db199SXin Li matching_names = None 1364*9c5db199SXin Li if not (is_regex_pattern or self.ContainsWildcard(pattern)): 1365*9c5db199SXin Li try: 1366*9c5db199SXin Li self.WaitForGsPaths(['%s/%s' % (url, pattern)], timeout) 1367*9c5db199SXin Li return [os.path.basename(pattern)] 1368*9c5db199SXin Li except GSCommandError: 1369*9c5db199SXin Li pass 1370*9c5db199SXin Li 1371*9c5db199SXin Li if not matching_names: 1372*9c5db199SXin Li matching_names = timeout_util.WaitForSuccess( 1373*9c5db199SXin Li lambda x: not x, _GetGsName, timeout=timeout, period=period) 1374*9c5db199SXin Li 1375*9c5db199SXin Li logging.debug('matching_names=%s, is_regex_pattern=%r', 1376*9c5db199SXin Li matching_names, is_regex_pattern) 1377*9c5db199SXin Li return matching_names 1378*9c5db199SXin Li except timeout_util.TimeoutError: 1379*9c5db199SXin Li return None 1380*9c5db199SXin Li 1381*9c5db199SXin Li 1382*9c5db199SXin Lidef _FirstMatch(predicate, elems): 1383*9c5db199SXin Li """Returns the first element matching the given |predicate|. 1384*9c5db199SXin Li 1385*9c5db199SXin Li Args: 1386*9c5db199SXin Li predicate: A function which takes an element and returns a bool 1387*9c5db199SXin Li elems: A sequence of elements. 1388*9c5db199SXin Li """ 1389*9c5db199SXin Li matches = [x for x in elems if predicate(x)] 1390*9c5db199SXin Li return matches[0] if matches else None 1391*9c5db199SXin Li 1392*9c5db199SXin Li 1393*9c5db199SXin Lidef _FirstSubstring(superstring, haystack): 1394*9c5db199SXin Li """Returns the first elem of |haystack| which is a substring of |superstring|. 1395*9c5db199SXin Li 1396*9c5db199SXin Li Args: 1397*9c5db199SXin Li superstring: A string to search for substrings of. 1398*9c5db199SXin Li haystack: A sequence of strings to search through. 1399*9c5db199SXin Li """ 1400*9c5db199SXin Li return _FirstMatch(lambda s: s in superstring, haystack) 1401*9c5db199SXin Li 1402*9c5db199SXin Li 1403*9c5db199SXin Li@contextlib.contextmanager 1404*9c5db199SXin Lidef TemporaryURL(prefix): 1405*9c5db199SXin Li """Context manager to generate a random URL. 1406*9c5db199SXin Li 1407*9c5db199SXin Li At the end, the URL will be deleted. 1408*9c5db199SXin Li """ 1409*9c5db199SXin Li url = '%s/chromite-temp/%s/%s/%s' % (constants.TRASH_BUCKET, prefix, 1410*9c5db199SXin Li getpass.getuser(), 1411*9c5db199SXin Li cros_build_lib.GetRandomString()) 1412*9c5db199SXin Li ctx = GSContext() 1413*9c5db199SXin Li ctx.Remove(url, ignore_missing=True, recursive=True) 1414*9c5db199SXin Li try: 1415*9c5db199SXin Li yield url 1416*9c5db199SXin Li finally: 1417*9c5db199SXin Li ctx.Remove(url, ignore_missing=True, recursive=True) 1418