xref: /aosp_15_r20/external/autotest/utils/frozen_chromite/lib/gs.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# -*- coding: utf-8 -*-
2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Li"""Library to make common google storage operations more reliable."""
7*9c5db199SXin Li
8*9c5db199SXin Lifrom __future__ import print_function
9*9c5db199SXin Li
10*9c5db199SXin Liimport collections
11*9c5db199SXin Liimport contextlib
12*9c5db199SXin Liimport datetime
13*9c5db199SXin Liimport errno
14*9c5db199SXin Liimport fnmatch
15*9c5db199SXin Liimport getpass
16*9c5db199SXin Liimport glob
17*9c5db199SXin Liimport hashlib
18*9c5db199SXin Liimport os
19*9c5db199SXin Liimport re
20*9c5db199SXin Liimport shutil
21*9c5db199SXin Liimport subprocess
22*9c5db199SXin Liimport tempfile
23*9c5db199SXin Li
24*9c5db199SXin Liimport six
25*9c5db199SXin Lifrom six.moves import urllib
26*9c5db199SXin Li
27*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import constants
28*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cache
29*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_build_lib
30*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_collections
31*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_logging as logging
32*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import osutils
33*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import path_util
34*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import retry_stats
35*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import retry_util
36*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import signals
37*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import timeout_util
38*9c5db199SXin Li
39*9c5db199SXin Li
40*9c5db199SXin Li# This bucket has the allAuthenticatedUsers:READER ACL.
41*9c5db199SXin LiAUTHENTICATION_BUCKET = 'gs://chromeos-authentication-bucket/'
42*9c5db199SXin Li
43*9c5db199SXin Li# Public path, only really works for files.
44*9c5db199SXin LiPUBLIC_BASE_HTTPS_URL = 'https://storage.googleapis.com/'
45*9c5db199SXin Li
46*9c5db199SXin Li# Private path for files.
47*9c5db199SXin LiPRIVATE_BASE_HTTPS_URL = 'https://storage.cloud.google.com/'
48*9c5db199SXin Li
49*9c5db199SXin Li# Private path for directories.
50*9c5db199SXin Li# TODO(akeshet): this is a workaround for b/27653354. If that is ultimately
51*9c5db199SXin Li# fixed, revisit this workaround.
52*9c5db199SXin LiPRIVATE_BASE_HTTPS_DOWNLOAD_URL = 'https://stainless.corp.google.com/browse/'
53*9c5db199SXin LiBASE_GS_URL = 'gs://'
54*9c5db199SXin Li
55*9c5db199SXin Li# Format used by "gsutil ls -l" when reporting modified time.
56*9c5db199SXin LiDATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
57*9c5db199SXin Li
58*9c5db199SXin Li# Regexp for parsing each line of output from "gsutil ls -l".
59*9c5db199SXin Li# This regexp is prepared for the generation and meta_generation values,
60*9c5db199SXin Li# too, even though they are not expected until we use "-a".
61*9c5db199SXin Li#
62*9c5db199SXin Li# A detailed listing looks like:
63*9c5db199SXin Li#    99908  2014-03-01T05:50:08Z  gs://bucket/foo/abc#1234  metageneration=1
64*9c5db199SXin Li#                                 gs://bucket/foo/adir/
65*9c5db199SXin Li#    99908  2014-03-04T01:16:55Z  gs://bucket/foo/def#5678  metageneration=1
66*9c5db199SXin Li# TOTAL: 2 objects, 199816 bytes (495.36 KB)
67*9c5db199SXin LiLS_LA_RE = re.compile(
68*9c5db199SXin Li    r'^\s*(?P<content_length>\d*?)\s+'
69*9c5db199SXin Li    r'(?P<creation_time>\S*?)\s+'
70*9c5db199SXin Li    r'(?P<url>[^#$]+).*?'
71*9c5db199SXin Li    r'('
72*9c5db199SXin Li    r'#(?P<generation>\d+)\s+'
73*9c5db199SXin Li    r'meta_?generation=(?P<metageneration>\d+)'
74*9c5db199SXin Li    r')?\s*$')
75*9c5db199SXin LiLS_RE = re.compile(r'^\s*(?P<content_length>)(?P<creation_time>)(?P<url>.*)'
76*9c5db199SXin Li                   r'(?P<generation>)(?P<metageneration>)\s*$')
77*9c5db199SXin Li
78*9c5db199SXin Li# Format used by ContainsWildCard, which is duplicated from
79*9c5db199SXin Li# https://github.com/GoogleCloudPlatform/gsutil/blob/v4.21/gslib/storage_url.py#L307.
80*9c5db199SXin LiWILDCARD_REGEX = re.compile(r'[*?\[\]]')
81*9c5db199SXin Li
82*9c5db199SXin Li
83*9c5db199SXin Lidef PathIsGs(path):
84*9c5db199SXin Li  """Determine if a path is a Google Storage URI."""
85*9c5db199SXin Li  return path.startswith(BASE_GS_URL)
86*9c5db199SXin Li
87*9c5db199SXin Li
88*9c5db199SXin Lidef CanonicalizeURL(url, strict=False):
89*9c5db199SXin Li  """Convert provided URL to gs:// URL, if it follows a known format.
90*9c5db199SXin Li
91*9c5db199SXin Li  Args:
92*9c5db199SXin Li    url: URL to canonicalize.
93*9c5db199SXin Li    strict: Raises exception if URL cannot be canonicalized.
94*9c5db199SXin Li  """
95*9c5db199SXin Li  for prefix in (PUBLIC_BASE_HTTPS_URL,
96*9c5db199SXin Li                 PRIVATE_BASE_HTTPS_URL,
97*9c5db199SXin Li                 PRIVATE_BASE_HTTPS_DOWNLOAD_URL,
98*9c5db199SXin Li                 'https://pantheon.corp.google.com/storage/browser/',
99*9c5db199SXin Li                 'https://commondatastorage.googleapis.com/'):
100*9c5db199SXin Li    if url.startswith(prefix):
101*9c5db199SXin Li      return url.replace(prefix, BASE_GS_URL, 1)
102*9c5db199SXin Li
103*9c5db199SXin Li  if not PathIsGs(url) and strict:
104*9c5db199SXin Li    raise ValueError('Url %r cannot be canonicalized.' % url)
105*9c5db199SXin Li
106*9c5db199SXin Li  return url
107*9c5db199SXin Li
108*9c5db199SXin Li
109*9c5db199SXin Lidef GetGsURL(bucket, for_gsutil=False, public=True, suburl=''):
110*9c5db199SXin Li  """Construct a Google Storage URL
111*9c5db199SXin Li
112*9c5db199SXin Li  Args:
113*9c5db199SXin Li    bucket: The Google Storage bucket to use
114*9c5db199SXin Li    for_gsutil: Do you want a URL for passing to `gsutil`?
115*9c5db199SXin Li    public: Do we want the public or private url
116*9c5db199SXin Li    suburl: A url fragment to tack onto the end
117*9c5db199SXin Li
118*9c5db199SXin Li  Returns:
119*9c5db199SXin Li    The fully constructed URL
120*9c5db199SXin Li  """
121*9c5db199SXin Li  url = 'gs://%s/%s' % (bucket, suburl)
122*9c5db199SXin Li
123*9c5db199SXin Li  if for_gsutil:
124*9c5db199SXin Li    return url
125*9c5db199SXin Li  else:
126*9c5db199SXin Li    return GsUrlToHttp(url, public=public)
127*9c5db199SXin Li
128*9c5db199SXin Li
129*9c5db199SXin Lidef GsUrlToHttp(path, public=True, directory=False):
130*9c5db199SXin Li  """Convert a GS URL to a HTTP URL for the same resource.
131*9c5db199SXin Li
132*9c5db199SXin Li  Because the HTTP Urls are not fixed (and may not always be simple prefix
133*9c5db199SXin Li  replacements), use this method to centralize the conversion.
134*9c5db199SXin Li
135*9c5db199SXin Li  Directories need to have different URLs from files, because the Web UIs for GS
136*9c5db199SXin Li  are weird and really inconsistent. Also public directories probably
137*9c5db199SXin Li  don't work, and probably never will (permissions as well as UI).
138*9c5db199SXin Li
139*9c5db199SXin Li  e.g. 'gs://chromeos-image-archive/path/file' ->
140*9c5db199SXin Li       'https://pantheon/path/file'
141*9c5db199SXin Li
142*9c5db199SXin Li  Args:
143*9c5db199SXin Li    path: GS URL to convert.
144*9c5db199SXin Li    public: Is this URL for Googler access, or publicly visible?
145*9c5db199SXin Li    directory: Force this URL to be treated as a directory?
146*9c5db199SXin Li               We try to autodetect on False.
147*9c5db199SXin Li
148*9c5db199SXin Li  Returns:
149*9c5db199SXin Li    https URL as a string.
150*9c5db199SXin Li  """
151*9c5db199SXin Li  assert PathIsGs(path)
152*9c5db199SXin Li  directory = directory or path.endswith('/')
153*9c5db199SXin Li
154*9c5db199SXin Li  # Public HTTP URls for directories don't work'
155*9c5db199SXin Li  # assert not public or not directory,
156*9c5db199SXin Li
157*9c5db199SXin Li  if public:
158*9c5db199SXin Li    return path.replace(BASE_GS_URL, PUBLIC_BASE_HTTPS_URL, 1)
159*9c5db199SXin Li  else:
160*9c5db199SXin Li    if directory:
161*9c5db199SXin Li      return path.replace(BASE_GS_URL, PRIVATE_BASE_HTTPS_DOWNLOAD_URL, 1)
162*9c5db199SXin Li    else:
163*9c5db199SXin Li      return path.replace(BASE_GS_URL, PRIVATE_BASE_HTTPS_URL, 1)
164*9c5db199SXin Li
165*9c5db199SXin Li
166*9c5db199SXin Liclass GSContextException(Exception):
167*9c5db199SXin Li  """Base exception for all exceptions thrown by GSContext."""
168*9c5db199SXin Li
169*9c5db199SXin Li
170*9c5db199SXin Li# Since the underlying code uses run, some callers might be trying to
171*9c5db199SXin Li# catch cros_build_lib.RunCommandError themselves.  Extend that class so that
172*9c5db199SXin Li# code continues to work.
173*9c5db199SXin Liclass GSCommandError(GSContextException, cros_build_lib.RunCommandError):
174*9c5db199SXin Li  """Thrown when an error happened we couldn't decode."""
175*9c5db199SXin Li
176*9c5db199SXin Li
177*9c5db199SXin Liclass GSContextPreconditionFailed(GSContextException):
178*9c5db199SXin Li  """Thrown when google storage returns code=PreconditionFailed."""
179*9c5db199SXin Li
180*9c5db199SXin Li
181*9c5db199SXin Liclass GSNoSuchKey(GSContextException):
182*9c5db199SXin Li  """Thrown when google storage returns code=NoSuchKey."""
183*9c5db199SXin Li
184*9c5db199SXin Li
185*9c5db199SXin Li# Detailed results of GSContext.Stat.
186*9c5db199SXin Li#
187*9c5db199SXin Li# The fields directory correspond to gsutil stat results.
188*9c5db199SXin Li#
189*9c5db199SXin Li#  Field name        Type         Example
190*9c5db199SXin Li#   creation_time     datetime     Sat, 23 Aug 2014 06:53:20 GMT
191*9c5db199SXin Li#   content_length    int          74
192*9c5db199SXin Li#   content_type      string       application/octet-stream
193*9c5db199SXin Li#   hash_crc32c       string       BBPMPA==
194*9c5db199SXin Li#   hash_md5          string       ms+qSYvgI9SjXn8tW/5UpQ==
195*9c5db199SXin Li#   etag              string       CNCgocbmqMACEAE=
196*9c5db199SXin Li#   generation        int          1408776800850000
197*9c5db199SXin Li#   metageneration    int          1
198*9c5db199SXin Li#
199*9c5db199SXin Li# Note: We omit a few stat fields as they are not always available, and we
200*9c5db199SXin Li# have no callers that want this currently.
201*9c5db199SXin Li#
202*9c5db199SXin Li#   content_language  string/None  en   # This field may be None.
203*9c5db199SXin LiGSStatResult = collections.namedtuple(
204*9c5db199SXin Li    'GSStatResult',
205*9c5db199SXin Li    ('creation_time', 'content_length', 'content_type', 'hash_crc32c',
206*9c5db199SXin Li     'hash_md5', 'etag', 'generation', 'metageneration'))
207*9c5db199SXin Li
208*9c5db199SXin Li
209*9c5db199SXin Li# Detailed results of GSContext.List.
210*9c5db199SXin LiGSListResult = collections.namedtuple(
211*9c5db199SXin Li    'GSListResult',
212*9c5db199SXin Li    ('url', 'creation_time', 'content_length', 'generation', 'metageneration'))
213*9c5db199SXin Li
214*9c5db199SXin Li
215*9c5db199SXin LiErrorDetails = cros_collections.Collection(
216*9c5db199SXin Li    'ErrorDetails',
217*9c5db199SXin Li    type=None, message_pattern='', retriable=None, exception=None)
218*9c5db199SXin Li
219*9c5db199SXin Li
220*9c5db199SXin Liclass GSCounter(object):
221*9c5db199SXin Li  """A counter class for Google Storage."""
222*9c5db199SXin Li
223*9c5db199SXin Li  def __init__(self, ctx, path):
224*9c5db199SXin Li    """Create a counter object.
225*9c5db199SXin Li
226*9c5db199SXin Li    Args:
227*9c5db199SXin Li      ctx: A GSContext object.
228*9c5db199SXin Li      path: The path to the counter in Google Storage.
229*9c5db199SXin Li    """
230*9c5db199SXin Li    self.ctx = ctx
231*9c5db199SXin Li    self.path = path
232*9c5db199SXin Li
233*9c5db199SXin Li  def Get(self):
234*9c5db199SXin Li    """Get the current value of a counter."""
235*9c5db199SXin Li    try:
236*9c5db199SXin Li      return int(self.ctx.Cat(self.path))
237*9c5db199SXin Li    except GSNoSuchKey:
238*9c5db199SXin Li      return 0
239*9c5db199SXin Li
240*9c5db199SXin Li  def AtomicCounterOperation(self, default_value, operation):
241*9c5db199SXin Li    """Atomically set the counter value using |operation|.
242*9c5db199SXin Li
243*9c5db199SXin Li    Args:
244*9c5db199SXin Li      default_value: Default value to use for counter, if counter
245*9c5db199SXin Li                     does not exist.
246*9c5db199SXin Li      operation: Function that takes the current counter value as a
247*9c5db199SXin Li                 parameter, and returns the new desired value.
248*9c5db199SXin Li
249*9c5db199SXin Li    Returns:
250*9c5db199SXin Li      The new counter value. None if value could not be set.
251*9c5db199SXin Li    """
252*9c5db199SXin Li    generation, _ = self.ctx.GetGeneration(self.path)
253*9c5db199SXin Li    for _ in range(self.ctx.retries + 1):
254*9c5db199SXin Li      try:
255*9c5db199SXin Li        value = default_value if generation == 0 else operation(self.Get())
256*9c5db199SXin Li        self.ctx.Copy('-', self.path, input=str(value), version=generation)
257*9c5db199SXin Li        return value
258*9c5db199SXin Li      except (GSContextPreconditionFailed, GSNoSuchKey):
259*9c5db199SXin Li        # GSContextPreconditionFailed is thrown if another builder is also
260*9c5db199SXin Li        # trying to update the counter and we lost the race. GSNoSuchKey is
261*9c5db199SXin Li        # thrown if another builder deleted the counter. In either case, fetch
262*9c5db199SXin Li        # the generation again, and, if it has changed, try the copy again.
263*9c5db199SXin Li        new_generation, _ = self.ctx.GetGeneration(self.path)
264*9c5db199SXin Li        if new_generation == generation:
265*9c5db199SXin Li          raise
266*9c5db199SXin Li        generation = new_generation
267*9c5db199SXin Li
268*9c5db199SXin Li  def Increment(self):
269*9c5db199SXin Li    """Increment the counter.
270*9c5db199SXin Li
271*9c5db199SXin Li    Returns:
272*9c5db199SXin Li      The new counter value. None if value could not be set.
273*9c5db199SXin Li    """
274*9c5db199SXin Li    return self.AtomicCounterOperation(1, lambda x: x + 1)
275*9c5db199SXin Li
276*9c5db199SXin Li  def Decrement(self):
277*9c5db199SXin Li    """Decrement the counter.
278*9c5db199SXin Li
279*9c5db199SXin Li    Returns:
280*9c5db199SXin Li      The new counter value. None if value could not be set.
281*9c5db199SXin Li    """
282*9c5db199SXin Li    return self.AtomicCounterOperation(-1, lambda x: x - 1)
283*9c5db199SXin Li
284*9c5db199SXin Li  def Reset(self):
285*9c5db199SXin Li    """Reset the counter to zero.
286*9c5db199SXin Li
287*9c5db199SXin Li    Returns:
288*9c5db199SXin Li      The new counter value. None if value could not be set.
289*9c5db199SXin Li    """
290*9c5db199SXin Li    return self.AtomicCounterOperation(0, lambda x: 0)
291*9c5db199SXin Li
292*9c5db199SXin Li  def StreakIncrement(self):
293*9c5db199SXin Li    """Increment the counter if it is positive, otherwise set it to 1.
294*9c5db199SXin Li
295*9c5db199SXin Li    Returns:
296*9c5db199SXin Li      The new counter value. None if value could not be set.
297*9c5db199SXin Li    """
298*9c5db199SXin Li    return self.AtomicCounterOperation(1, lambda x: x + 1 if x > 0 else 1)
299*9c5db199SXin Li
300*9c5db199SXin Li  def StreakDecrement(self):
301*9c5db199SXin Li    """Decrement the counter if it is negative, otherwise set it to -1.
302*9c5db199SXin Li
303*9c5db199SXin Li    Returns:
304*9c5db199SXin Li      The new counter value. None if value could not be set.
305*9c5db199SXin Li    """
306*9c5db199SXin Li    return self.AtomicCounterOperation(-1, lambda x: x - 1 if x < 0 else -1)
307*9c5db199SXin Li
308*9c5db199SXin Li
309*9c5db199SXin Liclass GSContext(object):
310*9c5db199SXin Li  """A class to wrap common google storage operations."""
311*9c5db199SXin Li
312*9c5db199SXin Li  # Error messages that indicate an invalid BOTO config.
313*9c5db199SXin Li  AUTHORIZATION_ERRORS = ('no configured', 'none configured',
314*9c5db199SXin Li                          'detail=Authorization', '401 Anonymous caller')
315*9c5db199SXin Li
316*9c5db199SXin Li  DEFAULT_BOTO_FILE = os.path.expanduser('~/.boto')
317*9c5db199SXin Li  DEFAULT_GSUTIL_TRACKER_DIR = os.path.expanduser('~/.gsutil/tracker-files')
318*9c5db199SXin Li  # This is set for ease of testing.
319*9c5db199SXin Li  DEFAULT_GSUTIL_BIN = None
320*9c5db199SXin Li  DEFAULT_GSUTIL_BUILDER_BIN = '/b/build/third_party/gsutil/gsutil'
321*9c5db199SXin Li  # How many times to retry uploads.
322*9c5db199SXin Li  DEFAULT_RETRIES = 3
323*9c5db199SXin Li
324*9c5db199SXin Li  # Multiplier for how long to sleep (in seconds) between retries; will delay
325*9c5db199SXin Li  # (1*sleep) the first time, then (2*sleep), continuing via attempt * sleep.
326*9c5db199SXin Li  DEFAULT_SLEEP_TIME = 60
327*9c5db199SXin Li
328*9c5db199SXin Li  GSUTIL_VERSION = '4.51'
329*9c5db199SXin Li  GSUTIL_TAR = 'gsutil_%s.tar.gz' % GSUTIL_VERSION
330*9c5db199SXin Li  GSUTIL_URL = (PUBLIC_BASE_HTTPS_URL +
331*9c5db199SXin Li                'chromeos-mirror/gentoo/distfiles/%s' % GSUTIL_TAR)
332*9c5db199SXin Li  GSUTIL_API_SELECTOR = 'JSON'
333*9c5db199SXin Li
334*9c5db199SXin Li  RESUMABLE_UPLOAD_ERROR = (b'Too many resumable upload attempts failed '
335*9c5db199SXin Li                            b'without progress')
336*9c5db199SXin Li  RESUMABLE_DOWNLOAD_ERROR = (b'Too many resumable download attempts failed '
337*9c5db199SXin Li                              b'without progress')
338*9c5db199SXin Li
339*9c5db199SXin Li  # TODO: Below is a list of known flaky errors that we should
340*9c5db199SXin Li  # retry. The list needs to be extended.
341*9c5db199SXin Li  RESUMABLE_ERROR_MESSAGE = (
342*9c5db199SXin Li      RESUMABLE_DOWNLOAD_ERROR,
343*9c5db199SXin Li      RESUMABLE_UPLOAD_ERROR,
344*9c5db199SXin Li      b'ResumableUploadException',
345*9c5db199SXin Li      b'ResumableUploadAbortException',
346*9c5db199SXin Li      b'ResumableDownloadException',
347*9c5db199SXin Li      b'ssl.SSLError: The read operation timed out',
348*9c5db199SXin Li      # TODO: Error messages may change in different library versions,
349*9c5db199SXin Li      # use regexes to match resumable error messages.
350*9c5db199SXin Li      b"ssl.SSLError: ('The read operation timed out',)",
351*9c5db199SXin Li      b'ssl.SSLError: _ssl.c:495: The handshake operation timed out',
352*9c5db199SXin Li      b'Unable to find the server',
353*9c5db199SXin Li      b"doesn't match cloud-supplied digest",
354*9c5db199SXin Li      b'ssl.SSLError: [Errno 8]',
355*9c5db199SXin Li      b'EOF occurred in violation of protocol',
356*9c5db199SXin Li      # TODO(nxia): crbug.com/775330 narrow down the criteria for retrying
357*9c5db199SXin Li      b'AccessDeniedException',
358*9c5db199SXin Li  )
359*9c5db199SXin Li
360*9c5db199SXin Li  # We have seen flaky errors with 5xx return codes
361*9c5db199SXin Li  # See b/17376491 for the "JSON decoding" error.
362*9c5db199SXin Li  # We have seen transient Oauth 2.0 credential errors (crbug.com/414345).
363*9c5db199SXin Li  TRANSIENT_ERROR_MESSAGE = (
364*9c5db199SXin Li      b'ServiceException: 5',
365*9c5db199SXin Li      b'Failure: No JSON object could be decoded',
366*9c5db199SXin Li      b'Oauth 2.0 User Account',
367*9c5db199SXin Li      b'InvalidAccessKeyId',
368*9c5db199SXin Li      b'socket.error: [Errno 104] Connection reset by peer',
369*9c5db199SXin Li      b'Received bad request from server',
370*9c5db199SXin Li      b"can't start new thread",
371*9c5db199SXin Li  )
372*9c5db199SXin Li
373*9c5db199SXin Li  @classmethod
374*9c5db199SXin Li  def GetDefaultGSUtilBin(cls, cache_dir=None, cache_user=None):
375*9c5db199SXin Li    if cls.DEFAULT_GSUTIL_BIN is None:
376*9c5db199SXin Li      if cache_dir is None:
377*9c5db199SXin Li        cache_dir = path_util.GetCacheDir()
378*9c5db199SXin Li      if cache_dir is not None:
379*9c5db199SXin Li        common_path = os.path.join(cache_dir, constants.COMMON_CACHE)
380*9c5db199SXin Li        tar_cache = cache.TarballCache(common_path, cache_user=cache_user)
381*9c5db199SXin Li        key = (cls.GSUTIL_TAR,)
382*9c5db199SXin Li        # The common cache will not be LRU, removing the need to hold a read
383*9c5db199SXin Li        # lock on the cached gsutil.
384*9c5db199SXin Li        ref = tar_cache.Lookup(key)
385*9c5db199SXin Li        ref.SetDefault(cls.GSUTIL_URL)
386*9c5db199SXin Li        cls.DEFAULT_GSUTIL_BIN = os.path.join(ref.path, 'gsutil', 'gsutil')
387*9c5db199SXin Li        cls._CompileCrcmod(ref.path)
388*9c5db199SXin Li      else:
389*9c5db199SXin Li        # Check if the default gsutil path for builders exists. If
390*9c5db199SXin Li        # not, try locating gsutil. If none exists, simply use 'gsutil'.
391*9c5db199SXin Li        gsutil_bin = cls.DEFAULT_GSUTIL_BUILDER_BIN
392*9c5db199SXin Li        if not os.path.exists(gsutil_bin):
393*9c5db199SXin Li          gsutil_bin = osutils.Which('gsutil')
394*9c5db199SXin Li        if gsutil_bin is None:
395*9c5db199SXin Li          gsutil_bin = 'gsutil'
396*9c5db199SXin Li        cls.DEFAULT_GSUTIL_BIN = gsutil_bin
397*9c5db199SXin Li
398*9c5db199SXin Li    return cls.DEFAULT_GSUTIL_BIN
399*9c5db199SXin Li
400*9c5db199SXin Li  @classmethod
401*9c5db199SXin Li  def _CompileCrcmod(cls, path):
402*9c5db199SXin Li    """Try to setup a compiled crcmod for gsutil.
403*9c5db199SXin Li
404*9c5db199SXin Li    The native crcmod code is much faster than the python implementation, and
405*9c5db199SXin Li    enables some more features (otherwise gsutil internally disables them).
406*9c5db199SXin Li    Try to compile the module on demand in the crcmod tree bundled with gsutil.
407*9c5db199SXin Li
408*9c5db199SXin Li    For more details, see:
409*9c5db199SXin Li    https://cloud.google.com/storage/docs/gsutil/addlhelp/CRC32CandInstallingcrcmod
410*9c5db199SXin Li    """
411*9c5db199SXin Li    src_root = os.path.join(path, 'gsutil', 'third_party', 'crcmod')
412*9c5db199SXin Li
413*9c5db199SXin Li    # Try to build it once.
414*9c5db199SXin Li    flag = os.path.join(src_root, '.chromite.tried.build')
415*9c5db199SXin Li    if os.path.exists(flag):
416*9c5db199SXin Li      return
417*9c5db199SXin Li    # Flag things now regardless of how the attempt below works out.
418*9c5db199SXin Li    try:
419*9c5db199SXin Li      osutils.Touch(flag)
420*9c5db199SXin Li    except IOError as e:
421*9c5db199SXin Li      # If the gsutil dir was cached previously as root, but now we're
422*9c5db199SXin Li      # non-root, just flag it and return.
423*9c5db199SXin Li      if e.errno == errno.EACCES:
424*9c5db199SXin Li        logging.debug('Skipping gsutil crcmod compile due to permissions')
425*9c5db199SXin Li        cros_build_lib.sudo_run(['touch', flag], debug_level=logging.DEBUG)
426*9c5db199SXin Li        return
427*9c5db199SXin Li      else:
428*9c5db199SXin Li        raise
429*9c5db199SXin Li
430*9c5db199SXin Li    # See if the system includes one in which case we're done.
431*9c5db199SXin Li    # We probe `python` as that's what gsutil uses for its shebang.
432*9c5db199SXin Li    result = cros_build_lib.run(
433*9c5db199SXin Li        ['python', '-c', 'from crcmod.crcmod import _usingExtension; '
434*9c5db199SXin Li         'exit(0 if _usingExtension else 1)'], check=False, capture_output=True)
435*9c5db199SXin Li    if result.returncode == 0:
436*9c5db199SXin Li      return
437*9c5db199SXin Li
438*9c5db199SXin Li    # See if the local copy has one.
439*9c5db199SXin Li    for pyver in ('python2', 'python3'):
440*9c5db199SXin Li      logging.debug('Attempting to compile local crcmod for %s gsutil', pyver)
441*9c5db199SXin Li      with osutils.TempDir(prefix='chromite.gsutil.crcmod') as tempdir:
442*9c5db199SXin Li        result = cros_build_lib.run(
443*9c5db199SXin Li            [pyver, 'setup.py', 'build', '--build-base', tempdir,
444*9c5db199SXin Li             '--build-platlib', tempdir],
445*9c5db199SXin Li            cwd=src_root, capture_output=True, check=False,
446*9c5db199SXin Li            debug_level=logging.DEBUG)
447*9c5db199SXin Li        if result.returncode:
448*9c5db199SXin Li          continue
449*9c5db199SXin Li
450*9c5db199SXin Li        # Locate the module in the build dir.
451*9c5db199SXin Li        copied = False
452*9c5db199SXin Li        for mod_path in glob.glob(
453*9c5db199SXin Li            os.path.join(tempdir, 'crcmod', '_crcfunext*.so')):
454*9c5db199SXin Li          dst_mod_path = os.path.join(src_root, pyver, 'crcmod',
455*9c5db199SXin Li                                      os.path.basename(mod_path))
456*9c5db199SXin Li          try:
457*9c5db199SXin Li            shutil.copy2(mod_path, dst_mod_path)
458*9c5db199SXin Li            copied = True
459*9c5db199SXin Li          except shutil.Error:
460*9c5db199SXin Li            pass
461*9c5db199SXin Li
462*9c5db199SXin Li        if not copied:
463*9c5db199SXin Li          # If the module compile failed (missing compiler/headers/whatever),
464*9c5db199SXin Li          # then the setup.py build command above would have passed, but there
465*9c5db199SXin Li          # won't actually be a _crcfunext.so module.  Check for it here to
466*9c5db199SXin Li          # disambiguate other errors from shutil.copy2.
467*9c5db199SXin Li          logging.debug('No crcmod module produced (missing host compiler?)')
468*9c5db199SXin Li          continue
469*9c5db199SXin Li
470*9c5db199SXin Li  def __init__(self, boto_file=None, cache_dir=None, acl=None,
471*9c5db199SXin Li               dry_run=False, gsutil_bin=None, init_boto=False, retries=None,
472*9c5db199SXin Li               sleep=None, cache_user=None):
473*9c5db199SXin Li    """Constructor.
474*9c5db199SXin Li
475*9c5db199SXin Li    Args:
476*9c5db199SXin Li      boto_file: Fully qualified path to user's .boto credential file.
477*9c5db199SXin Li      cache_dir: The absolute path to the cache directory. Use the default
478*9c5db199SXin Li        fallback if not given.
479*9c5db199SXin Li      acl: If given, a canned ACL. It is not valid to pass in an ACL file
480*9c5db199SXin Li        here, because most gsutil commands do not accept ACL files. If you
481*9c5db199SXin Li        would like to use an ACL file, use the SetACL command instead.
482*9c5db199SXin Li      dry_run: Testing mode that prints commands that would be run.
483*9c5db199SXin Li      gsutil_bin: If given, the absolute path to the gsutil binary.  Else
484*9c5db199SXin Li        the default fallback will be used.
485*9c5db199SXin Li      init_boto: If set to True, GSContext will check during __init__ if a
486*9c5db199SXin Li        valid boto config is configured, and if not, will attempt to ask the
487*9c5db199SXin Li        user to interactively set up the boto config.
488*9c5db199SXin Li      retries: Number of times to retry a command before failing.
489*9c5db199SXin Li      sleep: Amount of time to sleep between failures.
490*9c5db199SXin Li      cache_user: user for creating cache_dir for gsutil. Default is None.
491*9c5db199SXin Li    """
492*9c5db199SXin Li    if gsutil_bin is None:
493*9c5db199SXin Li      gsutil_bin = self.GetDefaultGSUtilBin(cache_dir, cache_user=cache_user)
494*9c5db199SXin Li    else:
495*9c5db199SXin Li      self._CheckFile('gsutil not found', gsutil_bin)
496*9c5db199SXin Li    self.gsutil_bin = gsutil_bin
497*9c5db199SXin Li
498*9c5db199SXin Li    # The version of gsutil is retrieved on demand and cached here.
499*9c5db199SXin Li    self._gsutil_version = None
500*9c5db199SXin Li
501*9c5db199SXin Li    # Increase the number of retries. With 10 retries, Boto will try a total of
502*9c5db199SXin Li    # 11 times and wait up to 2**11 seconds (~30 minutes) in total, not
503*9c5db199SXin Li    # not including the time spent actually uploading or downloading.
504*9c5db199SXin Li    self.gsutil_flags = ['-o', 'Boto:num_retries=10']
505*9c5db199SXin Li
506*9c5db199SXin Li    # Set HTTP proxy if environment variable http_proxy is set
507*9c5db199SXin Li    # (crbug.com/325032).
508*9c5db199SXin Li    if 'http_proxy' in os.environ:
509*9c5db199SXin Li      url = urllib.parse.urlparse(os.environ['http_proxy'])
510*9c5db199SXin Li      if not url.hostname or (not url.username and url.password):
511*9c5db199SXin Li        logging.warning('GS_ERROR: Ignoring env variable http_proxy because it '
512*9c5db199SXin Li                        'is not properly set: %s', os.environ['http_proxy'])
513*9c5db199SXin Li      else:
514*9c5db199SXin Li        self.gsutil_flags += ['-o', 'Boto:proxy=%s' % url.hostname]
515*9c5db199SXin Li        if url.username:
516*9c5db199SXin Li          self.gsutil_flags += ['-o', 'Boto:proxy_user=%s' % url.username]
517*9c5db199SXin Li        if url.password:
518*9c5db199SXin Li          self.gsutil_flags += ['-o', 'Boto:proxy_pass=%s' % url.password]
519*9c5db199SXin Li        if url.port:
520*9c5db199SXin Li          self.gsutil_flags += ['-o', 'Boto:proxy_port=%d' % url.port]
521*9c5db199SXin Li
522*9c5db199SXin Li    # Prefer boto_file if specified, else prefer the env then the default.
523*9c5db199SXin Li    if boto_file is None:
524*9c5db199SXin Li      boto_file = os.environ.get('BOTO_CONFIG')
525*9c5db199SXin Li    if boto_file is None and os.path.isfile(self.DEFAULT_BOTO_FILE):
526*9c5db199SXin Li      # Only set boto file to DEFAULT_BOTO_FILE if it exists.
527*9c5db199SXin Li      boto_file = self.DEFAULT_BOTO_FILE
528*9c5db199SXin Li
529*9c5db199SXin Li    self.boto_file = boto_file
530*9c5db199SXin Li
531*9c5db199SXin Li    self.acl = acl
532*9c5db199SXin Li
533*9c5db199SXin Li    self.dry_run = dry_run
534*9c5db199SXin Li    self.retries = self.DEFAULT_RETRIES if retries is None else int(retries)
535*9c5db199SXin Li    self._sleep_time = self.DEFAULT_SLEEP_TIME if sleep is None else int(sleep)
536*9c5db199SXin Li
537*9c5db199SXin Li    if init_boto and not dry_run:
538*9c5db199SXin Li      # We can't really expect gsutil to even be present in dry_run mode.
539*9c5db199SXin Li      self._InitBoto()
540*9c5db199SXin Li
541*9c5db199SXin Li  @property
542*9c5db199SXin Li  def gsutil_version(self):
543*9c5db199SXin Li    """Return the version of the gsutil in this context."""
544*9c5db199SXin Li    if not self._gsutil_version:
545*9c5db199SXin Li      if self.dry_run:
546*9c5db199SXin Li        self._gsutil_version = self.GSUTIL_VERSION
547*9c5db199SXin Li      else:
548*9c5db199SXin Li        cmd = ['-q', 'version']
549*9c5db199SXin Li
550*9c5db199SXin Li        # gsutil has been known to return version to stderr in the past, so
551*9c5db199SXin Li        # use stderr=subprocess.STDOUT.
552*9c5db199SXin Li        result = self.DoCommand(cmd, stdout=True, stderr=subprocess.STDOUT)
553*9c5db199SXin Li
554*9c5db199SXin Li        # Expect output like: 'gsutil version 3.35' or 'gsutil version: 4.5'.
555*9c5db199SXin Li        match = re.search(r'^\s*gsutil\s+version:?\s+([\d.]+)', result.output,
556*9c5db199SXin Li                          re.IGNORECASE)
557*9c5db199SXin Li        if match:
558*9c5db199SXin Li          self._gsutil_version = match.group(1)
559*9c5db199SXin Li        else:
560*9c5db199SXin Li          raise GSContextException('Unexpected output format from "%s":\n%s.' %
561*9c5db199SXin Li                                   (result.cmdstr, result.output))
562*9c5db199SXin Li
563*9c5db199SXin Li    return self._gsutil_version
564*9c5db199SXin Li
565*9c5db199SXin Li  def _CheckFile(self, errmsg, afile):
566*9c5db199SXin Li    """Pre-flight check for valid inputs.
567*9c5db199SXin Li
568*9c5db199SXin Li    Args:
569*9c5db199SXin Li      errmsg: Error message to display.
570*9c5db199SXin Li      afile: Fully qualified path to test file existance.
571*9c5db199SXin Li    """
572*9c5db199SXin Li    if not os.path.isfile(afile):
573*9c5db199SXin Li      raise GSContextException('%s, %s is not a file' % (errmsg, afile))
574*9c5db199SXin Li
575*9c5db199SXin Li  def _TestGSLs(self):
576*9c5db199SXin Li    """Quick test of gsutil functionality."""
577*9c5db199SXin Li    # The bucket in question is readable by any authenticated account.
578*9c5db199SXin Li    # If we can list it's contents, we have valid authentication.
579*9c5db199SXin Li    cmd = ['ls', AUTHENTICATION_BUCKET]
580*9c5db199SXin Li    result = self.DoCommand(cmd, retries=0, debug_level=logging.DEBUG,
581*9c5db199SXin Li                            stderr=True, check=False)
582*9c5db199SXin Li
583*9c5db199SXin Li    # Did we fail with an authentication error?
584*9c5db199SXin Li    if (result.returncode == 1 and
585*9c5db199SXin Li        any(e in result.error for e in self.AUTHORIZATION_ERRORS)):
586*9c5db199SXin Li      logging.warning('gsutil authentication failure msg: %s', result.error)
587*9c5db199SXin Li      return False
588*9c5db199SXin Li
589*9c5db199SXin Li    return True
590*9c5db199SXin Li
591*9c5db199SXin Li  def _ConfigureBotoConfig(self):
592*9c5db199SXin Li    """Make sure we can access protected bits in GS."""
593*9c5db199SXin Li    print('Configuring gsutil. **Please use your @google.com account.**')
594*9c5db199SXin Li    try:
595*9c5db199SXin Li      if not self.boto_file:
596*9c5db199SXin Li        self.boto_file = self.DEFAULT_BOTO_FILE
597*9c5db199SXin Li      self.DoCommand(['config'], retries=0, debug_level=logging.CRITICAL,
598*9c5db199SXin Li                     print_cmd=False)
599*9c5db199SXin Li    finally:
600*9c5db199SXin Li      if (os.path.exists(self.boto_file) and not
601*9c5db199SXin Li          os.path.getsize(self.boto_file)):
602*9c5db199SXin Li        os.remove(self.boto_file)
603*9c5db199SXin Li        raise GSContextException('GS config could not be set up.')
604*9c5db199SXin Li
605*9c5db199SXin Li  def _InitBoto(self):
606*9c5db199SXin Li    if not self._TestGSLs():
607*9c5db199SXin Li      self._ConfigureBotoConfig()
608*9c5db199SXin Li
609*9c5db199SXin Li  def Cat(self, path, **kwargs):
610*9c5db199SXin Li    """Returns the contents of a GS object."""
611*9c5db199SXin Li    kwargs.setdefault('stdout', True)
612*9c5db199SXin Li    encoding = kwargs.setdefault('encoding', None)
613*9c5db199SXin Li    errors = kwargs.setdefault('errors', None)
614*9c5db199SXin Li    if not PathIsGs(path):
615*9c5db199SXin Li      # gsutil doesn't support cat-ting a local path, so read it ourselves.
616*9c5db199SXin Li      mode = 'rb' if encoding is None else 'r'
617*9c5db199SXin Li      try:
618*9c5db199SXin Li        return osutils.ReadFile(path, mode=mode, encoding=encoding,
619*9c5db199SXin Li                                errors=errors)
620*9c5db199SXin Li      except Exception as e:
621*9c5db199SXin Li        if getattr(e, 'errno', None) == errno.ENOENT:
622*9c5db199SXin Li          raise GSNoSuchKey('Cat Error: file %s does not exist' % path)
623*9c5db199SXin Li        else:
624*9c5db199SXin Li          raise GSContextException(str(e))
625*9c5db199SXin Li    elif self.dry_run:
626*9c5db199SXin Li      return b'' if encoding is None else ''
627*9c5db199SXin Li    else:
628*9c5db199SXin Li      return self.DoCommand(['cat', path], **kwargs).output
629*9c5db199SXin Li
630*9c5db199SXin Li  def StreamingCat(self, path, chunksize=0x100000):
631*9c5db199SXin Li    """Returns the content of a GS file as a stream.
632*9c5db199SXin Li
633*9c5db199SXin Li    Unlike Cat or Copy, this function doesn't support any internal retry or
634*9c5db199SXin Li    validation by computing checksum of downloaded data. Users should perform
635*9c5db199SXin Li    their own validation, or use Cat() instead.
636*9c5db199SXin Li
637*9c5db199SXin Li    Args:
638*9c5db199SXin Li      path: Full gs:// path of the src file.
639*9c5db199SXin Li      chunksize: At most how much data read from upstream and yield to callers
640*9c5db199SXin Li        at a time. The default value is 1 MB.
641*9c5db199SXin Li
642*9c5db199SXin Li    Yields:
643*9c5db199SXin Li      The file content, chunk by chunk, as bytes.
644*9c5db199SXin Li    """
645*9c5db199SXin Li    assert PathIsGs(path)
646*9c5db199SXin Li
647*9c5db199SXin Li    if self.dry_run:
648*9c5db199SXin Li      return (lambda: (yield ''))()
649*9c5db199SXin Li
650*9c5db199SXin Li    cmd = [self.gsutil_bin] + self.gsutil_flags + ['cat', path]
651*9c5db199SXin Li    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
652*9c5db199SXin Li
653*9c5db199SXin Li    def read_content():
654*9c5db199SXin Li      try:
655*9c5db199SXin Li        while True:
656*9c5db199SXin Li          data = proc.stdout.read(chunksize)
657*9c5db199SXin Li          if not data and proc.poll() is not None:
658*9c5db199SXin Li            break
659*9c5db199SXin Li          if data:
660*9c5db199SXin Li            yield data
661*9c5db199SXin Li
662*9c5db199SXin Li        rc = proc.poll()
663*9c5db199SXin Li        if rc:
664*9c5db199SXin Li          raise GSCommandError(
665*9c5db199SXin Li              'Cannot stream cat %s from Google Storage!' % path, rc, None)
666*9c5db199SXin Li      finally:
667*9c5db199SXin Li        if proc.returncode is None:
668*9c5db199SXin Li          proc.stdout.close()
669*9c5db199SXin Li          proc.terminate()
670*9c5db199SXin Li
671*9c5db199SXin Li    return read_content()
672*9c5db199SXin Li
673*9c5db199SXin Li  def CopyInto(self, local_path, remote_dir, filename=None, **kwargs):
674*9c5db199SXin Li    """Upload a local file into a directory in google storage.
675*9c5db199SXin Li
676*9c5db199SXin Li    Args:
677*9c5db199SXin Li      local_path: Local file path to copy.
678*9c5db199SXin Li      remote_dir: Full gs:// url of the directory to transfer the file into.
679*9c5db199SXin Li      filename: If given, the filename to place the content at; if not given,
680*9c5db199SXin Li        it's discerned from basename(local_path).
681*9c5db199SXin Li      **kwargs: See Copy() for documentation.
682*9c5db199SXin Li
683*9c5db199SXin Li    Returns:
684*9c5db199SXin Li      The generation of the remote file.
685*9c5db199SXin Li    """
686*9c5db199SXin Li    filename = filename if filename is not None else local_path
687*9c5db199SXin Li    # Basename it even if an explicit filename was given; we don't want
688*9c5db199SXin Li    # people using filename as a multi-directory path fragment.
689*9c5db199SXin Li    return self.Copy(local_path,
690*9c5db199SXin Li                     '%s/%s' % (remote_dir, os.path.basename(filename)),
691*9c5db199SXin Li                     **kwargs)
692*9c5db199SXin Li
693*9c5db199SXin Li  @staticmethod
694*9c5db199SXin Li  def GetTrackerFilenames(dest_path):
695*9c5db199SXin Li    """Returns a list of gsutil tracker filenames.
696*9c5db199SXin Li
697*9c5db199SXin Li    Tracker files are used by gsutil to resume downloads/uploads. This
698*9c5db199SXin Li    function does not handle parallel uploads.
699*9c5db199SXin Li
700*9c5db199SXin Li    Args:
701*9c5db199SXin Li      dest_path: Either a GS path or an absolute local path.
702*9c5db199SXin Li
703*9c5db199SXin Li    Returns:
704*9c5db199SXin Li      The list of potential tracker filenames.
705*9c5db199SXin Li    """
706*9c5db199SXin Li    dest = urllib.parse.urlsplit(dest_path)
707*9c5db199SXin Li    filenames = []
708*9c5db199SXin Li    if dest.scheme == 'gs':
709*9c5db199SXin Li      prefix = 'upload'
710*9c5db199SXin Li      bucket_name = dest.netloc
711*9c5db199SXin Li      object_name = dest.path.lstrip('/')
712*9c5db199SXin Li      filenames.append(
713*9c5db199SXin Li          re.sub(r'[/\\]', '_', 'resumable_upload__%s__%s__%s.url' %
714*9c5db199SXin Li                 (bucket_name, object_name, GSContext.GSUTIL_API_SELECTOR)))
715*9c5db199SXin Li    else:
716*9c5db199SXin Li      prefix = 'download'
717*9c5db199SXin Li      filenames.append(
718*9c5db199SXin Li          re.sub(r'[/\\]', '_', 'resumable_download__%s__%s.etag' %
719*9c5db199SXin Li                 (dest.path, GSContext.GSUTIL_API_SELECTOR)))
720*9c5db199SXin Li
721*9c5db199SXin Li    hashed_filenames = []
722*9c5db199SXin Li    for filename in filenames:
723*9c5db199SXin Li      m = hashlib.sha1(filename.encode())
724*9c5db199SXin Li      hashed_filenames.append('%s_TRACKER_%s.%s' %
725*9c5db199SXin Li                              (prefix, m.hexdigest(), filename[-16:]))
726*9c5db199SXin Li
727*9c5db199SXin Li    return hashed_filenames
728*9c5db199SXin Li
729*9c5db199SXin Li  def _RetryFilter(self, e):
730*9c5db199SXin Li    """Returns whether to retry RunCommandError exception |e|.
731*9c5db199SXin Li
732*9c5db199SXin Li    Args:
733*9c5db199SXin Li      e: Exception object to filter. Exception may be re-raised as
734*9c5db199SXin Li         as different type, if _RetryFilter determines a more appropriate
735*9c5db199SXin Li         exception type based on the contents of |e|.
736*9c5db199SXin Li    """
737*9c5db199SXin Li    error_details = self._MatchKnownError(e)
738*9c5db199SXin Li    if error_details.exception:
739*9c5db199SXin Li      raise error_details.exception
740*9c5db199SXin Li    return error_details.retriable
741*9c5db199SXin Li
742*9c5db199SXin Li  def _MatchKnownError(self, e):
743*9c5db199SXin Li    """Function to match known RunCommandError exceptions.
744*9c5db199SXin Li
745*9c5db199SXin Li    Args:
746*9c5db199SXin Li      e: Exception object to filter.
747*9c5db199SXin Li
748*9c5db199SXin Li    Returns:
749*9c5db199SXin Li      An ErrorDetails instance with details about the message pattern found.
750*9c5db199SXin Li    """
751*9c5db199SXin Li    if not retry_util.ShouldRetryCommandCommon(e):
752*9c5db199SXin Li      if not isinstance(e, cros_build_lib.RunCommandError):
753*9c5db199SXin Li        error_type = 'unknown'
754*9c5db199SXin Li      else:
755*9c5db199SXin Li        error_type = 'failed_to_launch'
756*9c5db199SXin Li      return ErrorDetails(type=error_type, retriable=False)
757*9c5db199SXin Li
758*9c5db199SXin Li    # e is guaranteed by above filter to be a RunCommandError
759*9c5db199SXin Li    if e.result.returncode < 0:
760*9c5db199SXin Li      sig_name = signals.StrSignal(-e.result.returncode)
761*9c5db199SXin Li      logging.info('Child process received signal %d; not retrying.', sig_name)
762*9c5db199SXin Li      return ErrorDetails(type='received_signal', message_pattern=sig_name,
763*9c5db199SXin Li                          retriable=False)
764*9c5db199SXin Li
765*9c5db199SXin Li    error = e.result.error
766*9c5db199SXin Li    if error:
767*9c5db199SXin Li      # Since the captured error will use the encoding the user requested,
768*9c5db199SXin Li      # normalize to bytes for testing below.
769*9c5db199SXin Li      if isinstance(error, six.text_type):
770*9c5db199SXin Li        error = error.encode('utf-8')
771*9c5db199SXin Li
772*9c5db199SXin Li      # gsutil usually prints PreconditionException when a precondition fails.
773*9c5db199SXin Li      # It may also print "ResumableUploadAbortException: 412 Precondition
774*9c5db199SXin Li      # Failed", so the logic needs to be a little more general.
775*9c5db199SXin Li      if (b'PreconditionException' in error or
776*9c5db199SXin Li          b'412 Precondition Failed' in error):
777*9c5db199SXin Li        return ErrorDetails(type='precondition_exception', retriable=False,
778*9c5db199SXin Li                            exception=GSContextPreconditionFailed(e))
779*9c5db199SXin Li
780*9c5db199SXin Li      # If the file does not exist, one of the following errors occurs. The
781*9c5db199SXin Li      # "stat" command leaves off the "CommandException: " prefix, but it also
782*9c5db199SXin Li      # outputs to stdout instead of stderr and so will not be caught here
783*9c5db199SXin Li      # regardless.
784*9c5db199SXin Li      if (b'CommandException: No URLs matched' in error or
785*9c5db199SXin Li          b'NotFoundException:' in error or
786*9c5db199SXin Li          b'One or more URLs matched no objects' in error):
787*9c5db199SXin Li        return ErrorDetails(type='no_such_key', retriable=False,
788*9c5db199SXin Li                            exception=GSNoSuchKey(e))
789*9c5db199SXin Li
790*9c5db199SXin Li      logging.warning('GS_ERROR: %s ', error)
791*9c5db199SXin Li
792*9c5db199SXin Li      # Temporary fix: remove the gsutil tracker files so that our retry
793*9c5db199SXin Li      # can hit a different backend. This should be removed after the
794*9c5db199SXin Li      # bug is fixed by the Google Storage team (see crbug.com/308300).
795*9c5db199SXin Li      resumable_error = _FirstSubstring(error, self.RESUMABLE_ERROR_MESSAGE)
796*9c5db199SXin Li      if resumable_error:
797*9c5db199SXin Li        # Only remove the tracker files if we try to upload/download a file.
798*9c5db199SXin Li        if 'cp' in e.result.cmd[:-2]:
799*9c5db199SXin Li          # Assume a command: gsutil [options] cp [options] src_path dest_path
800*9c5db199SXin Li          # dest_path needs to be a fully qualified local path, which is already
801*9c5db199SXin Li          # required for GSContext.Copy().
802*9c5db199SXin Li          tracker_filenames = self.GetTrackerFilenames(e.result.cmd[-1])
803*9c5db199SXin Li          logging.info('Potential list of tracker files: %s',
804*9c5db199SXin Li                       tracker_filenames)
805*9c5db199SXin Li          for tracker_filename in tracker_filenames:
806*9c5db199SXin Li            tracker_file_path = os.path.join(self.DEFAULT_GSUTIL_TRACKER_DIR,
807*9c5db199SXin Li                                             tracker_filename)
808*9c5db199SXin Li            if os.path.exists(tracker_file_path):
809*9c5db199SXin Li              logging.info('Deleting gsutil tracker file %s before retrying.',
810*9c5db199SXin Li                           tracker_file_path)
811*9c5db199SXin Li              logging.info('The content of the tracker file: %s',
812*9c5db199SXin Li                           osutils.ReadFile(tracker_file_path))
813*9c5db199SXin Li              osutils.SafeUnlink(tracker_file_path)
814*9c5db199SXin Li        return ErrorDetails(type='resumable',
815*9c5db199SXin Li                            message_pattern=resumable_error.decode('utf-8'),
816*9c5db199SXin Li                            retriable=True)
817*9c5db199SXin Li
818*9c5db199SXin Li      transient_error = _FirstSubstring(error, self.TRANSIENT_ERROR_MESSAGE)
819*9c5db199SXin Li      if transient_error:
820*9c5db199SXin Li        return ErrorDetails(type='transient',
821*9c5db199SXin Li                            message_pattern=transient_error.decode('utf-8'),
822*9c5db199SXin Li                            retriable=True)
823*9c5db199SXin Li
824*9c5db199SXin Li    return ErrorDetails(type='unknown', retriable=False)
825*9c5db199SXin Li
826*9c5db199SXin Li  # TODO(mtennant): Make a private method.
827*9c5db199SXin Li  def DoCommand(self, gsutil_cmd, headers=(), retries=None, version=None,
828*9c5db199SXin Li                parallel=False, **kwargs):
829*9c5db199SXin Li    """Run a gsutil command, suppressing output, and setting retry/sleep.
830*9c5db199SXin Li
831*9c5db199SXin Li    Args:
832*9c5db199SXin Li      gsutil_cmd: The (mostly) constructed gsutil subcommand to run.
833*9c5db199SXin Li      headers: A list of raw headers to pass down.
834*9c5db199SXin Li      parallel: Whether gsutil should enable parallel copy/update of multiple
835*9c5db199SXin Li        files. NOTE: This option causes gsutil to use significantly more
836*9c5db199SXin Li        memory, even if gsutil is only uploading one file.
837*9c5db199SXin Li      retries: How many times to retry this command (defaults to setting given
838*9c5db199SXin Li        at object creation).
839*9c5db199SXin Li      version: If given, the generation; essentially the timestamp of the last
840*9c5db199SXin Li        update.  Note this is not the same as sequence-number; it's
841*9c5db199SXin Li        monotonically increasing bucket wide rather than reset per file.
842*9c5db199SXin Li        The usage of this is if we intend to replace/update only if the version
843*9c5db199SXin Li        is what we expect.  This is useful for distributed reasons- for example,
844*9c5db199SXin Li        to ensure you don't overwrite someone else's creation, a version of
845*9c5db199SXin Li        0 states "only update if no version exists".
846*9c5db199SXin Li
847*9c5db199SXin Li    Returns:
848*9c5db199SXin Li      A RunCommandResult object.
849*9c5db199SXin Li    """
850*9c5db199SXin Li    kwargs = kwargs.copy()
851*9c5db199SXin Li    kwargs.setdefault('stderr', True)
852*9c5db199SXin Li    kwargs.setdefault('encoding', 'utf-8')
853*9c5db199SXin Li
854*9c5db199SXin Li    cmd = [self.gsutil_bin]
855*9c5db199SXin Li    cmd += self.gsutil_flags
856*9c5db199SXin Li    for header in headers:
857*9c5db199SXin Li      cmd += ['-h', header]
858*9c5db199SXin Li    if version is not None:
859*9c5db199SXin Li      cmd += ['-h', 'x-goog-if-generation-match:%d' % int(version)]
860*9c5db199SXin Li
861*9c5db199SXin Li    # Enable parallel copy/update of multiple files if stdin is not to
862*9c5db199SXin Li    # be piped to the command. This does not split a single file into
863*9c5db199SXin Li    # smaller components for upload.
864*9c5db199SXin Li    if parallel and kwargs.get('input') is None:
865*9c5db199SXin Li      cmd += ['-m']
866*9c5db199SXin Li
867*9c5db199SXin Li    cmd.extend(gsutil_cmd)
868*9c5db199SXin Li
869*9c5db199SXin Li    if retries is None:
870*9c5db199SXin Li      retries = self.retries
871*9c5db199SXin Li
872*9c5db199SXin Li    extra_env = kwargs.pop('extra_env', {})
873*9c5db199SXin Li    if self.boto_file and os.path.isfile(self.boto_file):
874*9c5db199SXin Li      extra_env.setdefault('BOTO_CONFIG', self.boto_file)
875*9c5db199SXin Li
876*9c5db199SXin Li    if self.dry_run:
877*9c5db199SXin Li      logging.debug("%s: would've run: %s", self.__class__.__name__,
878*9c5db199SXin Li                    cros_build_lib.CmdToStr(cmd))
879*9c5db199SXin Li    else:
880*9c5db199SXin Li      try:
881*9c5db199SXin Li        return retry_stats.RetryWithStats(retry_stats.GSUTIL,
882*9c5db199SXin Li                                          self._RetryFilter,
883*9c5db199SXin Li                                          retries, cros_build_lib.run,
884*9c5db199SXin Li                                          cmd, sleep=self._sleep_time,
885*9c5db199SXin Li                                          extra_env=extra_env, **kwargs)
886*9c5db199SXin Li      except cros_build_lib.RunCommandError as e:
887*9c5db199SXin Li        raise GSCommandError(e.msg, e.result, e.exception)
888*9c5db199SXin Li
889*9c5db199SXin Li  def Copy(self, src_path, dest_path, acl=None, recursive=False,
890*9c5db199SXin Li           skip_symlinks=True, auto_compress=False, **kwargs):
891*9c5db199SXin Li    """Copy to/from GS bucket.
892*9c5db199SXin Li
893*9c5db199SXin Li    Canned ACL permissions can be specified on the gsutil cp command line.
894*9c5db199SXin Li
895*9c5db199SXin Li    More info:
896*9c5db199SXin Li    https://developers.google.com/storage/docs/accesscontrol#applyacls
897*9c5db199SXin Li
898*9c5db199SXin Li    Args:
899*9c5db199SXin Li      src_path: Fully qualified local path or full gs:// path of the src file.
900*9c5db199SXin Li      dest_path: Fully qualified local path or full gs:// path of the dest
901*9c5db199SXin Li                 file.
902*9c5db199SXin Li      acl: One of the google storage canned_acls to apply.
903*9c5db199SXin Li      recursive: Whether to copy recursively.
904*9c5db199SXin Li      skip_symlinks: Skip symbolic links when copying recursively.
905*9c5db199SXin Li      auto_compress: Automatically compress with gzip when uploading.
906*9c5db199SXin Li
907*9c5db199SXin Li    Returns:
908*9c5db199SXin Li      The generation of the remote file.
909*9c5db199SXin Li
910*9c5db199SXin Li    Raises:
911*9c5db199SXin Li      RunCommandError if the command failed despite retries.
912*9c5db199SXin Li    """
913*9c5db199SXin Li    # -v causes gs://bucket/path#generation to be listed in output.
914*9c5db199SXin Li    cmd = ['cp', '-v']
915*9c5db199SXin Li
916*9c5db199SXin Li    # Certain versions of gsutil (at least 4.3) assume the source of a copy is
917*9c5db199SXin Li    # a directory if the -r option is used. If it's really a file, gsutil will
918*9c5db199SXin Li    # look like it's uploading it but not actually do anything. We'll work
919*9c5db199SXin Li    # around that problem by surpressing the -r flag if we detect the source
920*9c5db199SXin Li    # is a local file.
921*9c5db199SXin Li    if recursive and not os.path.isfile(src_path):
922*9c5db199SXin Li      cmd.append('-r')
923*9c5db199SXin Li      if skip_symlinks:
924*9c5db199SXin Li        cmd.append('-e')
925*9c5db199SXin Li
926*9c5db199SXin Li    if auto_compress:
927*9c5db199SXin Li      cmd.append('-Z')
928*9c5db199SXin Li
929*9c5db199SXin Li    acl = self.acl if acl is None else acl
930*9c5db199SXin Li    if acl is not None:
931*9c5db199SXin Li      cmd += ['-a', acl]
932*9c5db199SXin Li
933*9c5db199SXin Li    with cros_build_lib.ContextManagerStack() as stack:
934*9c5db199SXin Li      # Write the input into a tempfile if possible. This is needed so that
935*9c5db199SXin Li      # gsutil can retry failed requests.  We allow the input to be a string
936*9c5db199SXin Li      # or bytes regardless of the output encoding.
937*9c5db199SXin Li      if src_path == '-' and kwargs.get('input') is not None:
938*9c5db199SXin Li        f = stack.Add(tempfile.NamedTemporaryFile, mode='wb')
939*9c5db199SXin Li        data = kwargs['input']
940*9c5db199SXin Li        if isinstance(data, six.text_type):
941*9c5db199SXin Li          data = data.encode('utf-8')
942*9c5db199SXin Li        f.write(data)
943*9c5db199SXin Li        f.flush()
944*9c5db199SXin Li        del kwargs['input']
945*9c5db199SXin Li        src_path = f.name
946*9c5db199SXin Li
947*9c5db199SXin Li      cmd += ['--', src_path, dest_path]
948*9c5db199SXin Li
949*9c5db199SXin Li      if not (PathIsGs(src_path) or PathIsGs(dest_path)):
950*9c5db199SXin Li        # Don't retry on local copies.
951*9c5db199SXin Li        kwargs.setdefault('retries', 0)
952*9c5db199SXin Li
953*9c5db199SXin Li      kwargs['capture_output'] = True
954*9c5db199SXin Li      try:
955*9c5db199SXin Li        result = self.DoCommand(cmd, **kwargs)
956*9c5db199SXin Li        if self.dry_run:
957*9c5db199SXin Li          return None
958*9c5db199SXin Li
959*9c5db199SXin Li        # Now we parse the output for the current generation number.  Example:
960*9c5db199SXin Li        #   Created: gs://chromeos-throw-away-bucket/foo#1360630664537000.1
961*9c5db199SXin Li        m = re.search(r'Created: .*#(\d+)([.](\d+))?\n', result.error)
962*9c5db199SXin Li        if m:
963*9c5db199SXin Li          return int(m.group(1))
964*9c5db199SXin Li        else:
965*9c5db199SXin Li          return None
966*9c5db199SXin Li      except GSNoSuchKey as e:
967*9c5db199SXin Li        # If the source was a local file, the error is a quirk of gsutil 4.5
968*9c5db199SXin Li        # and should be ignored. If the source was remote, there might
969*9c5db199SXin Li        # legitimately be no such file. See crbug.com/393419.
970*9c5db199SXin Li        if os.path.isfile(src_path):
971*9c5db199SXin Li          return None
972*9c5db199SXin Li
973*9c5db199SXin Li        # Temp log for crbug.com/642986, should be removed when the bug
974*9c5db199SXin Li        # is fixed.
975*9c5db199SXin Li        logging.warning('Copy Error: src %s dest %s: %s '
976*9c5db199SXin Li                        '(Temp log for crbug.com/642986)',
977*9c5db199SXin Li                        src_path, dest_path, e)
978*9c5db199SXin Li        raise
979*9c5db199SXin Li
980*9c5db199SXin Li  def CreateWithContents(self, gs_uri, contents, **kwargs):
981*9c5db199SXin Li    """Creates the specified file with specified contents.
982*9c5db199SXin Li
983*9c5db199SXin Li    Args:
984*9c5db199SXin Li      gs_uri: The URI of a file on Google Storage.
985*9c5db199SXin Li      contents: String or bytes with contents to write to the file.
986*9c5db199SXin Li      kwargs: See additional options that Copy takes.
987*9c5db199SXin Li
988*9c5db199SXin Li    Raises:
989*9c5db199SXin Li      See Copy.
990*9c5db199SXin Li    """
991*9c5db199SXin Li    self.Copy('-', gs_uri, input=contents, **kwargs)
992*9c5db199SXin Li
993*9c5db199SXin Li  # TODO: Merge LS() and List()?
994*9c5db199SXin Li  def LS(self, path, **kwargs):
995*9c5db199SXin Li    """Does a directory listing of the given gs path.
996*9c5db199SXin Li
997*9c5db199SXin Li    Args:
998*9c5db199SXin Li      path: The path to get a listing of.
999*9c5db199SXin Li      kwargs: See options that DoCommand takes.
1000*9c5db199SXin Li
1001*9c5db199SXin Li    Returns:
1002*9c5db199SXin Li      A list of paths that matched |path|.  Might be more than one if a
1003*9c5db199SXin Li      directory or path include wildcards/etc...
1004*9c5db199SXin Li    """
1005*9c5db199SXin Li    if self.dry_run:
1006*9c5db199SXin Li      return []
1007*9c5db199SXin Li
1008*9c5db199SXin Li    if not PathIsGs(path):
1009*9c5db199SXin Li      # gsutil doesn't support listing a local path, so just run 'ls'.
1010*9c5db199SXin Li      kwargs.pop('retries', None)
1011*9c5db199SXin Li      kwargs.pop('headers', None)
1012*9c5db199SXin Li      kwargs['capture_output'] = True
1013*9c5db199SXin Li      kwargs.setdefault('encoding', 'utf-8')
1014*9c5db199SXin Li      result = cros_build_lib.run(['ls', path], **kwargs)
1015*9c5db199SXin Li      return result.output.splitlines()
1016*9c5db199SXin Li    else:
1017*9c5db199SXin Li      return [x.url for x in self.List(path, **kwargs)]
1018*9c5db199SXin Li
1019*9c5db199SXin Li  def List(self, path, details=False, **kwargs):
1020*9c5db199SXin Li    """Does a directory listing of the given gs path.
1021*9c5db199SXin Li
1022*9c5db199SXin Li    Args:
1023*9c5db199SXin Li      path: The path to get a listing of.
1024*9c5db199SXin Li      details: Whether to include size/timestamp info.
1025*9c5db199SXin Li      kwargs: See options that DoCommand takes.
1026*9c5db199SXin Li
1027*9c5db199SXin Li    Returns:
1028*9c5db199SXin Li      A list of GSListResult objects that matched |path|.  Might be more
1029*9c5db199SXin Li      than one if a directory or path include wildcards/etc...
1030*9c5db199SXin Li    """
1031*9c5db199SXin Li    ret = []
1032*9c5db199SXin Li    if self.dry_run:
1033*9c5db199SXin Li      return ret
1034*9c5db199SXin Li
1035*9c5db199SXin Li    cmd = ['ls']
1036*9c5db199SXin Li    if details:
1037*9c5db199SXin Li      cmd += ['-l']
1038*9c5db199SXin Li    cmd += ['--', path]
1039*9c5db199SXin Li
1040*9c5db199SXin Li    # We always request the extended details as the overhead compared to a plain
1041*9c5db199SXin Li    # listing is negligible.
1042*9c5db199SXin Li    kwargs['stdout'] = True
1043*9c5db199SXin Li    lines = self.DoCommand(cmd, **kwargs).output.splitlines()
1044*9c5db199SXin Li
1045*9c5db199SXin Li    if details:
1046*9c5db199SXin Li      # The last line is expected to be a summary line.  Ignore it.
1047*9c5db199SXin Li      lines = lines[:-1]
1048*9c5db199SXin Li      ls_re = LS_LA_RE
1049*9c5db199SXin Li    else:
1050*9c5db199SXin Li      ls_re = LS_RE
1051*9c5db199SXin Li
1052*9c5db199SXin Li    # Handle optional fields.
1053*9c5db199SXin Li    intify = lambda x: int(x) if x else None
1054*9c5db199SXin Li
1055*9c5db199SXin Li    # Parse out each result and build up the results list.
1056*9c5db199SXin Li    for line in lines:
1057*9c5db199SXin Li      match = ls_re.search(line)
1058*9c5db199SXin Li      if not match:
1059*9c5db199SXin Li        raise GSContextException('unable to parse line: %s' % line)
1060*9c5db199SXin Li      if match.group('creation_time'):
1061*9c5db199SXin Li        timestamp = datetime.datetime.strptime(match.group('creation_time'),
1062*9c5db199SXin Li                                               DATETIME_FORMAT)
1063*9c5db199SXin Li      else:
1064*9c5db199SXin Li        timestamp = None
1065*9c5db199SXin Li
1066*9c5db199SXin Li      ret.append(GSListResult(
1067*9c5db199SXin Li          content_length=intify(match.group('content_length')),
1068*9c5db199SXin Li          creation_time=timestamp,
1069*9c5db199SXin Li          url=match.group('url'),
1070*9c5db199SXin Li          generation=intify(match.group('generation')),
1071*9c5db199SXin Li          metageneration=intify(match.group('metageneration'))))
1072*9c5db199SXin Li
1073*9c5db199SXin Li    return ret
1074*9c5db199SXin Li
1075*9c5db199SXin Li  def GetSize(self, path, **kwargs):
1076*9c5db199SXin Li    """Returns size of a single object (local or GS)."""
1077*9c5db199SXin Li    if not PathIsGs(path):
1078*9c5db199SXin Li      return os.path.getsize(path)
1079*9c5db199SXin Li    else:
1080*9c5db199SXin Li      return self.Stat(path, **kwargs).content_length
1081*9c5db199SXin Li
1082*9c5db199SXin Li  def Move(self, src_path, dest_path, **kwargs):
1083*9c5db199SXin Li    """Move/rename to/from GS bucket.
1084*9c5db199SXin Li
1085*9c5db199SXin Li    Args:
1086*9c5db199SXin Li      src_path: Fully qualified local path or full gs:// path of the src file.
1087*9c5db199SXin Li      dest_path: Fully qualified local path or full gs:// path of the dest file.
1088*9c5db199SXin Li      kwargs: See options that DoCommand takes.
1089*9c5db199SXin Li    """
1090*9c5db199SXin Li    cmd = ['mv', '--', src_path, dest_path]
1091*9c5db199SXin Li    return self.DoCommand(cmd, **kwargs)
1092*9c5db199SXin Li
1093*9c5db199SXin Li  def SetACL(self, upload_url, acl=None, **kwargs):
1094*9c5db199SXin Li    """Set access on a file already in google storage.
1095*9c5db199SXin Li
1096*9c5db199SXin Li    Args:
1097*9c5db199SXin Li      upload_url: gs:// url that will have acl applied to it.
1098*9c5db199SXin Li      acl: An ACL permissions file or canned ACL.
1099*9c5db199SXin Li      kwargs: See options that DoCommand takes.
1100*9c5db199SXin Li    """
1101*9c5db199SXin Li    if acl is None:
1102*9c5db199SXin Li      if not self.acl:
1103*9c5db199SXin Li        raise GSContextException(
1104*9c5db199SXin Li            'SetAcl invoked w/out a specified acl, nor a default acl.')
1105*9c5db199SXin Li      acl = self.acl
1106*9c5db199SXin Li
1107*9c5db199SXin Li    self.DoCommand(['acl', 'set', acl, upload_url], **kwargs)
1108*9c5db199SXin Li
1109*9c5db199SXin Li  def ChangeACL(self, upload_url, acl_args_file=None, acl_args=None, **kwargs):
1110*9c5db199SXin Li    """Change access on a file already in google storage with "acl ch".
1111*9c5db199SXin Li
1112*9c5db199SXin Li    Args:
1113*9c5db199SXin Li      upload_url: gs:// url that will have acl applied to it.
1114*9c5db199SXin Li      acl_args_file: A file with arguments to the gsutil acl ch command. The
1115*9c5db199SXin Li                     arguments can be spread across multiple lines. Comments
1116*9c5db199SXin Li                     start with a # character and extend to the end of the
1117*9c5db199SXin Li                     line. Exactly one of this argument or acl_args must be
1118*9c5db199SXin Li                     set.
1119*9c5db199SXin Li      acl_args: A list of arguments for the gsutil acl ch command. Exactly
1120*9c5db199SXin Li                one of this argument or acl_args must be set.
1121*9c5db199SXin Li      kwargs: See options that DoCommand takes.
1122*9c5db199SXin Li    """
1123*9c5db199SXin Li    if acl_args_file and acl_args:
1124*9c5db199SXin Li      raise GSContextException(
1125*9c5db199SXin Li          'ChangeACL invoked with both acl_args and acl_args set.')
1126*9c5db199SXin Li    if not acl_args_file and not acl_args:
1127*9c5db199SXin Li      raise GSContextException(
1128*9c5db199SXin Li          'ChangeACL invoked with neither acl_args nor acl_args set.')
1129*9c5db199SXin Li
1130*9c5db199SXin Li    if acl_args_file:
1131*9c5db199SXin Li      lines = osutils.ReadFile(acl_args_file).splitlines()
1132*9c5db199SXin Li      # Strip out comments.
1133*9c5db199SXin Li      lines = [x.split('#', 1)[0].strip() for x in lines]
1134*9c5db199SXin Li      acl_args = ' '.join([x for x in lines if x]).split()
1135*9c5db199SXin Li
1136*9c5db199SXin Li    # Some versions of gsutil bubble up precondition failures even when we
1137*9c5db199SXin Li    # didn't request it due to how ACL changes happen internally to gsutil.
1138*9c5db199SXin Li    # https://crbug.com/763450
1139*9c5db199SXin Li    # We keep the retry limit a bit low because DoCommand already has its
1140*9c5db199SXin Li    # own level of retries.
1141*9c5db199SXin Li    retry_util.RetryException(
1142*9c5db199SXin Li        GSContextPreconditionFailed, 3, self.DoCommand,
1143*9c5db199SXin Li        ['acl', 'ch'] + acl_args + [upload_url], **kwargs)
1144*9c5db199SXin Li
1145*9c5db199SXin Li  def Exists(self, path, **kwargs):
1146*9c5db199SXin Li    """Checks whether the given object exists.
1147*9c5db199SXin Li
1148*9c5db199SXin Li    Args:
1149*9c5db199SXin Li      path: Local path or gs:// url to check.
1150*9c5db199SXin Li      kwargs: Flags to pass to DoCommand.
1151*9c5db199SXin Li
1152*9c5db199SXin Li    Returns:
1153*9c5db199SXin Li      True if the path exists; otherwise returns False.
1154*9c5db199SXin Li    """
1155*9c5db199SXin Li    if not PathIsGs(path):
1156*9c5db199SXin Li      return os.path.exists(path)
1157*9c5db199SXin Li
1158*9c5db199SXin Li    try:
1159*9c5db199SXin Li      self.Stat(path, **kwargs)
1160*9c5db199SXin Li    except GSNoSuchKey:
1161*9c5db199SXin Li      return False
1162*9c5db199SXin Li
1163*9c5db199SXin Li    return True
1164*9c5db199SXin Li
1165*9c5db199SXin Li  def Remove(self, path, recursive=False, ignore_missing=False, **kwargs):
1166*9c5db199SXin Li    """Remove the specified file.
1167*9c5db199SXin Li
1168*9c5db199SXin Li    Args:
1169*9c5db199SXin Li      path: Full gs:// url of the file to delete.
1170*9c5db199SXin Li      recursive: Remove recursively starting at path.
1171*9c5db199SXin Li      ignore_missing: Whether to suppress errors about missing files.
1172*9c5db199SXin Li      kwargs: Flags to pass to DoCommand.
1173*9c5db199SXin Li    """
1174*9c5db199SXin Li    cmd = ['rm']
1175*9c5db199SXin Li    if 'recurse' in kwargs:
1176*9c5db199SXin Li      raise TypeError('"recurse" has been renamed to "recursive"')
1177*9c5db199SXin Li    if recursive:
1178*9c5db199SXin Li      cmd.append('-R')
1179*9c5db199SXin Li    cmd.append('--')
1180*9c5db199SXin Li    cmd.append(path)
1181*9c5db199SXin Li    try:
1182*9c5db199SXin Li      self.DoCommand(cmd, **kwargs)
1183*9c5db199SXin Li    except GSNoSuchKey:
1184*9c5db199SXin Li      if not ignore_missing:
1185*9c5db199SXin Li        raise
1186*9c5db199SXin Li
1187*9c5db199SXin Li  def GetGeneration(self, path):
1188*9c5db199SXin Li    """Get the generation and metageneration of the given |path|.
1189*9c5db199SXin Li
1190*9c5db199SXin Li    Returns:
1191*9c5db199SXin Li      A tuple of the generation and metageneration.
1192*9c5db199SXin Li    """
1193*9c5db199SXin Li    try:
1194*9c5db199SXin Li      res = self.Stat(path)
1195*9c5db199SXin Li    except GSNoSuchKey:
1196*9c5db199SXin Li      return 0, 0
1197*9c5db199SXin Li
1198*9c5db199SXin Li    return res.generation, res.metageneration
1199*9c5db199SXin Li
1200*9c5db199SXin Li  def Stat(self, path, **kwargs):
1201*9c5db199SXin Li    """Stat a GS file, and get detailed information.
1202*9c5db199SXin Li
1203*9c5db199SXin Li    Args:
1204*9c5db199SXin Li      path: A GS path for files to Stat. Wildcards are NOT supported.
1205*9c5db199SXin Li      kwargs: Flags to pass to DoCommand.
1206*9c5db199SXin Li
1207*9c5db199SXin Li    Returns:
1208*9c5db199SXin Li      A GSStatResult object with all fields populated.
1209*9c5db199SXin Li
1210*9c5db199SXin Li    Raises:
1211*9c5db199SXin Li      Assorted GSContextException exceptions.
1212*9c5db199SXin Li    """
1213*9c5db199SXin Li    try:
1214*9c5db199SXin Li      res = self.DoCommand(['stat', '--', path], stdout=True, **kwargs)
1215*9c5db199SXin Li    except GSCommandError as e:
1216*9c5db199SXin Li      # Because the 'gsutil stat' command logs errors itself (instead of
1217*9c5db199SXin Li      # raising errors internally like other commands), we have to look
1218*9c5db199SXin Li      # for errors ourselves.  See the related bug report here:
1219*9c5db199SXin Li      # https://github.com/GoogleCloudPlatform/gsutil/issues/288
1220*9c5db199SXin Li      # Example line:
1221*9c5db199SXin Li      # No URLs matched gs://bucket/file
1222*9c5db199SXin Li      if e.result.error and e.result.error.startswith('No URLs matched'):
1223*9c5db199SXin Li        raise GSNoSuchKey('Stat Error: No URLs matched %s.' % path)
1224*9c5db199SXin Li
1225*9c5db199SXin Li      # No idea what this is, so just choke.
1226*9c5db199SXin Li      raise
1227*9c5db199SXin Li
1228*9c5db199SXin Li    # In dryrun mode, DoCommand doesn't return an object, so we need to fake
1229*9c5db199SXin Li    # out the behavior ourselves.
1230*9c5db199SXin Li    if self.dry_run:
1231*9c5db199SXin Li      return GSStatResult(
1232*9c5db199SXin Li          creation_time=datetime.datetime.now(),
1233*9c5db199SXin Li          content_length=0,
1234*9c5db199SXin Li          content_type='application/octet-stream',
1235*9c5db199SXin Li          hash_crc32c='AAAAAA==',
1236*9c5db199SXin Li          hash_md5='',
1237*9c5db199SXin Li          etag='',
1238*9c5db199SXin Li          generation=0,
1239*9c5db199SXin Li          metageneration=0)
1240*9c5db199SXin Li
1241*9c5db199SXin Li    # We expect Stat output like the following. However, the Content-Language
1242*9c5db199SXin Li    # line appears to be optional based on how the file in question was
1243*9c5db199SXin Li    # created.
1244*9c5db199SXin Li    #
1245*9c5db199SXin Li    # gs://bucket/path/file:
1246*9c5db199SXin Li    #     Creation time:      Sat, 23 Aug 2014 06:53:20 GMT
1247*9c5db199SXin Li    #     Content-Language:   en
1248*9c5db199SXin Li    #     Content-Length:     74
1249*9c5db199SXin Li    #     Content-Type:       application/octet-stream
1250*9c5db199SXin Li    #     Hash (crc32c):      BBPMPA==
1251*9c5db199SXin Li    #     Hash (md5):         ms+qSYvgI9SjXn8tW/5UpQ==
1252*9c5db199SXin Li    #     ETag:               CNCgocbmqMACEAE=
1253*9c5db199SXin Li    #     Generation:         1408776800850000
1254*9c5db199SXin Li    #     Metageneration:     1
1255*9c5db199SXin Li
1256*9c5db199SXin Li    if not res.output.startswith('gs://'):
1257*9c5db199SXin Li      raise GSContextException('Unexpected stat output: %s' % res.output)
1258*9c5db199SXin Li
1259*9c5db199SXin Li    def _GetField(name, optional=False):
1260*9c5db199SXin Li      m = re.search(r'%s:\s*(.+)' % re.escape(name), res.output)
1261*9c5db199SXin Li      if m:
1262*9c5db199SXin Li        return m.group(1)
1263*9c5db199SXin Li      elif optional:
1264*9c5db199SXin Li        return None
1265*9c5db199SXin Li      else:
1266*9c5db199SXin Li        raise GSContextException('Field "%s" missing in "%s"' %
1267*9c5db199SXin Li                                 (name, res.output))
1268*9c5db199SXin Li
1269*9c5db199SXin Li    return GSStatResult(
1270*9c5db199SXin Li        creation_time=datetime.datetime.strptime(
1271*9c5db199SXin Li            _GetField('Creation time'), '%a, %d %b %Y %H:%M:%S %Z'),
1272*9c5db199SXin Li        content_length=int(_GetField('Content-Length')),
1273*9c5db199SXin Li        content_type=_GetField('Content-Type'),
1274*9c5db199SXin Li        hash_crc32c=_GetField('Hash (crc32c)'),
1275*9c5db199SXin Li        hash_md5=_GetField('Hash (md5)', optional=True),
1276*9c5db199SXin Li        etag=_GetField('ETag'),
1277*9c5db199SXin Li        generation=int(_GetField('Generation')),
1278*9c5db199SXin Li        metageneration=int(_GetField('Metageneration')))
1279*9c5db199SXin Li
1280*9c5db199SXin Li  def Counter(self, path):
1281*9c5db199SXin Li    """Return a GSCounter object pointing at a |path| in Google Storage.
1282*9c5db199SXin Li
1283*9c5db199SXin Li    Args:
1284*9c5db199SXin Li      path: The path to the counter in Google Storage.
1285*9c5db199SXin Li    """
1286*9c5db199SXin Li    return GSCounter(self, path)
1287*9c5db199SXin Li
1288*9c5db199SXin Li  def WaitForGsPaths(self, paths, timeout, period=10):
1289*9c5db199SXin Li    """Wait until a list of files exist in GS.
1290*9c5db199SXin Li
1291*9c5db199SXin Li    Args:
1292*9c5db199SXin Li      paths: The list of files to wait for.
1293*9c5db199SXin Li      timeout: Max seconds to wait for file to appear.
1294*9c5db199SXin Li      period: How often to check for files while waiting.
1295*9c5db199SXin Li
1296*9c5db199SXin Li    Raises:
1297*9c5db199SXin Li      timeout_util.TimeoutError if the timeout is reached.
1298*9c5db199SXin Li    """
1299*9c5db199SXin Li    # Copy the list of URIs to wait for, so we don't modify the callers context.
1300*9c5db199SXin Li    pending_paths = paths[:]
1301*9c5db199SXin Li
1302*9c5db199SXin Li    def _CheckForExistence():
1303*9c5db199SXin Li      pending_paths[:] = [x for x in pending_paths if not self.Exists(x)]
1304*9c5db199SXin Li
1305*9c5db199SXin Li    def _Retry(_return_value):
1306*9c5db199SXin Li      # Retry, if there are any pending paths left.
1307*9c5db199SXin Li      return pending_paths
1308*9c5db199SXin Li
1309*9c5db199SXin Li    timeout_util.WaitForSuccess(_Retry, _CheckForExistence,
1310*9c5db199SXin Li                                timeout=timeout, period=period)
1311*9c5db199SXin Li
1312*9c5db199SXin Li  def ContainsWildcard(self, url):
1313*9c5db199SXin Li    """Checks whether url_string contains a wildcard.
1314*9c5db199SXin Li
1315*9c5db199SXin Li    Args:
1316*9c5db199SXin Li      url: URL string to check.
1317*9c5db199SXin Li
1318*9c5db199SXin Li    Returns:
1319*9c5db199SXin Li      True if |url| contains a wildcard.
1320*9c5db199SXin Li    """
1321*9c5db199SXin Li    return bool(WILDCARD_REGEX.search(url))
1322*9c5db199SXin Li
1323*9c5db199SXin Li  def GetGsNamesWithWait(self, pattern, url, timeout=600, period=10,
1324*9c5db199SXin Li                         is_regex_pattern=False):
1325*9c5db199SXin Li    """Returns the google storage names specified by the given pattern.
1326*9c5db199SXin Li
1327*9c5db199SXin Li    This method polls Google Storage until the target files specified by the
1328*9c5db199SXin Li    pattern is available or until the timeout occurs. Because we may not know
1329*9c5db199SXin Li    the exact name of the target files, the method accepts a filename pattern,
1330*9c5db199SXin Li    to identify whether a file whose name matches the pattern exists
1331*9c5db199SXin Li    (e.g. use pattern '*_full_*' to search for the full payload
1332*9c5db199SXin Li    'chromeos_R17-1413.0.0-a1_x86-mario_full_dev.bin'). Returns the name only
1333*9c5db199SXin Li    if found before the timeout.
1334*9c5db199SXin Li
1335*9c5db199SXin Li    Warning: GS listing are not perfect, and are eventually consistent. Doing a
1336*9c5db199SXin Li    search for file existence is a 'best effort'. Calling code should be aware
1337*9c5db199SXin Li    and ready to handle that.
1338*9c5db199SXin Li
1339*9c5db199SXin Li    Args:
1340*9c5db199SXin Li      pattern: a path pattern (glob or regex) identifying the files we need.
1341*9c5db199SXin Li      url: URL of the Google Storage bucket.
1342*9c5db199SXin Li      timeout: how many seconds are we allowed to keep trying.
1343*9c5db199SXin Li      period: how many seconds to wait between attempts.
1344*9c5db199SXin Li      is_regex_pattern: Whether the pattern is a regex (otherwise a glob).
1345*9c5db199SXin Li
1346*9c5db199SXin Li    Returns:
1347*9c5db199SXin Li      The list of files matching the pattern in Google Storage bucket or None
1348*9c5db199SXin Li      if the files are not found and hit the timeout_util.TimeoutError.
1349*9c5db199SXin Li    """
1350*9c5db199SXin Li    def _GetGsName():
1351*9c5db199SXin Li      uploaded_list = [os.path.basename(p.url) for p in self.List(url)]
1352*9c5db199SXin Li
1353*9c5db199SXin Li      if is_regex_pattern:
1354*9c5db199SXin Li        filter_re = re.compile(pattern)
1355*9c5db199SXin Li        matching_names = [f for f in uploaded_list
1356*9c5db199SXin Li                          if filter_re.search(f) is not None]
1357*9c5db199SXin Li      else:
1358*9c5db199SXin Li        matching_names = fnmatch.filter(uploaded_list, pattern)
1359*9c5db199SXin Li
1360*9c5db199SXin Li      return matching_names
1361*9c5db199SXin Li
1362*9c5db199SXin Li    try:
1363*9c5db199SXin Li      matching_names = None
1364*9c5db199SXin Li      if not (is_regex_pattern or self.ContainsWildcard(pattern)):
1365*9c5db199SXin Li        try:
1366*9c5db199SXin Li          self.WaitForGsPaths(['%s/%s' % (url, pattern)], timeout)
1367*9c5db199SXin Li          return [os.path.basename(pattern)]
1368*9c5db199SXin Li        except GSCommandError:
1369*9c5db199SXin Li          pass
1370*9c5db199SXin Li
1371*9c5db199SXin Li      if not matching_names:
1372*9c5db199SXin Li        matching_names = timeout_util.WaitForSuccess(
1373*9c5db199SXin Li            lambda x: not x, _GetGsName, timeout=timeout, period=period)
1374*9c5db199SXin Li
1375*9c5db199SXin Li      logging.debug('matching_names=%s, is_regex_pattern=%r',
1376*9c5db199SXin Li                    matching_names, is_regex_pattern)
1377*9c5db199SXin Li      return matching_names
1378*9c5db199SXin Li    except timeout_util.TimeoutError:
1379*9c5db199SXin Li      return None
1380*9c5db199SXin Li
1381*9c5db199SXin Li
1382*9c5db199SXin Lidef _FirstMatch(predicate, elems):
1383*9c5db199SXin Li  """Returns the first element matching the given |predicate|.
1384*9c5db199SXin Li
1385*9c5db199SXin Li  Args:
1386*9c5db199SXin Li    predicate: A function which takes an element and returns a bool
1387*9c5db199SXin Li    elems: A sequence of elements.
1388*9c5db199SXin Li  """
1389*9c5db199SXin Li  matches = [x for x in elems if predicate(x)]
1390*9c5db199SXin Li  return matches[0] if matches else None
1391*9c5db199SXin Li
1392*9c5db199SXin Li
1393*9c5db199SXin Lidef _FirstSubstring(superstring, haystack):
1394*9c5db199SXin Li  """Returns the first elem of |haystack| which is a substring of |superstring|.
1395*9c5db199SXin Li
1396*9c5db199SXin Li  Args:
1397*9c5db199SXin Li    superstring: A string to search for substrings of.
1398*9c5db199SXin Li    haystack: A sequence of strings to search through.
1399*9c5db199SXin Li  """
1400*9c5db199SXin Li  return _FirstMatch(lambda s: s in superstring, haystack)
1401*9c5db199SXin Li
1402*9c5db199SXin Li
1403*9c5db199SXin Li@contextlib.contextmanager
1404*9c5db199SXin Lidef TemporaryURL(prefix):
1405*9c5db199SXin Li  """Context manager to generate a random URL.
1406*9c5db199SXin Li
1407*9c5db199SXin Li  At the end, the URL will be deleted.
1408*9c5db199SXin Li  """
1409*9c5db199SXin Li  url = '%s/chromite-temp/%s/%s/%s' % (constants.TRASH_BUCKET, prefix,
1410*9c5db199SXin Li                                       getpass.getuser(),
1411*9c5db199SXin Li                                       cros_build_lib.GetRandomString())
1412*9c5db199SXin Li  ctx = GSContext()
1413*9c5db199SXin Li  ctx.Remove(url, ignore_missing=True, recursive=True)
1414*9c5db199SXin Li  try:
1415*9c5db199SXin Li    yield url
1416*9c5db199SXin Li  finally:
1417*9c5db199SXin Li    ctx.Remove(url, ignore_missing=True, recursive=True)
1418