xref: /aosp_15_r20/external/autotest/utils/frozen_chromite/lib/cache.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# -*- coding: utf-8 -*-
2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Li"""Contains on-disk caching functionality."""
7*9c5db199SXin Li
8*9c5db199SXin Lifrom __future__ import print_function
9*9c5db199SXin Li
10*9c5db199SXin Liimport datetime
11*9c5db199SXin Liimport errno
12*9c5db199SXin Liimport os
13*9c5db199SXin Liimport shutil
14*9c5db199SXin Liimport tempfile
15*9c5db199SXin Li
16*9c5db199SXin Lifrom six.moves import urllib
17*9c5db199SXin Li
18*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_build_lib
19*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_logging as logging
20*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import locking
21*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import osutils
22*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import retry_util
23*9c5db199SXin Li
24*9c5db199SXin Li
25*9c5db199SXin Li# pylint: disable=protected-access
26*9c5db199SXin Li
27*9c5db199SXin Li
28*9c5db199SXin Lidef EntryLock(f):
29*9c5db199SXin Li  """Decorator that provides monitor access control."""
30*9c5db199SXin Li
31*9c5db199SXin Li  def new_f(self, *args, **kwargs):
32*9c5db199SXin Li    # Ensure we don't have a read lock before potentially blocking while trying
33*9c5db199SXin Li    # to access the monitor.
34*9c5db199SXin Li    if self.read_locked:
35*9c5db199SXin Li      raise AssertionError(
36*9c5db199SXin Li          'Cannot call %s while holding a read lock.' % f.__name__)
37*9c5db199SXin Li
38*9c5db199SXin Li    with self._entry_lock:
39*9c5db199SXin Li      self._entry_lock.write_lock()
40*9c5db199SXin Li      return f(self, *args, **kwargs)
41*9c5db199SXin Li  return new_f
42*9c5db199SXin Li
43*9c5db199SXin Li
44*9c5db199SXin Lidef WriteLock(f):
45*9c5db199SXin Li  """Decorator that takes a write lock."""
46*9c5db199SXin Li
47*9c5db199SXin Li  def new_f(self, *args, **kwargs):
48*9c5db199SXin Li    with self._lock.write_lock():
49*9c5db199SXin Li      return f(self, *args, **kwargs)
50*9c5db199SXin Li  return new_f
51*9c5db199SXin Li
52*9c5db199SXin Li
53*9c5db199SXin Liclass CacheReference(object):
54*9c5db199SXin Li  """Encapsulates operations on a cache key reference.
55*9c5db199SXin Li
56*9c5db199SXin Li  CacheReferences are returned by the DiskCache.Lookup() function.  They are
57*9c5db199SXin Li  used to read from and insert into the cache.
58*9c5db199SXin Li
59*9c5db199SXin Li  A typical example of using a CacheReference:
60*9c5db199SXin Li
61*9c5db199SXin Li  @contextlib.contextmanager
62*9c5db199SXin Li  def FetchFromCache()
63*9c5db199SXin Li    with cache.Lookup(key) as ref:
64*9c5db199SXin Li       # If entry doesn't exist in cache already, generate it ourselves, and
65*9c5db199SXin Li       # insert it into the cache, acquiring a read lock on it in the process.
66*9c5db199SXin Li       # If the entry does exist, we grab a read lock on it.
67*9c5db199SXin Li      if not ref.Exists(lock=True):
68*9c5db199SXin Li        path = PrepareItem()
69*9c5db199SXin Li        ref.SetDefault(path, lock=True)
70*9c5db199SXin Li
71*9c5db199SXin Li      # yield the path to the cached entry to consuming code.
72*9c5db199SXin Li      yield ref.path
73*9c5db199SXin Li  """
74*9c5db199SXin Li
75*9c5db199SXin Li  def __init__(self, cache, key):
76*9c5db199SXin Li    self._cache = cache
77*9c5db199SXin Li    self.key = key
78*9c5db199SXin Li    self.acquired = False
79*9c5db199SXin Li    self.read_locked = False
80*9c5db199SXin Li    self._lock = cache._LockForKey(key)
81*9c5db199SXin Li    self._entry_lock = cache._LockForKey(key, suffix='.entry_lock')
82*9c5db199SXin Li
83*9c5db199SXin Li  @property
84*9c5db199SXin Li  def path(self):
85*9c5db199SXin Li    """Returns on-disk path to the cached item."""
86*9c5db199SXin Li    return self._cache.GetKeyPath(self.key)
87*9c5db199SXin Li
88*9c5db199SXin Li  def Acquire(self):
89*9c5db199SXin Li    """Prepare the cache reference for operation.
90*9c5db199SXin Li
91*9c5db199SXin Li    This must be called (either explicitly or through entering a 'with'
92*9c5db199SXin Li    context) before calling any methods that acquire locks, or mutates
93*9c5db199SXin Li    reference.
94*9c5db199SXin Li    """
95*9c5db199SXin Li    if self.acquired:
96*9c5db199SXin Li      raise AssertionError(
97*9c5db199SXin Li          'Attempting to acquire an already acquired reference.')
98*9c5db199SXin Li
99*9c5db199SXin Li    self.acquired = True
100*9c5db199SXin Li    self._lock.__enter__()
101*9c5db199SXin Li
102*9c5db199SXin Li  def Release(self):
103*9c5db199SXin Li    """Release the cache reference.  Causes any held locks to be released."""
104*9c5db199SXin Li    if not self.acquired:
105*9c5db199SXin Li      raise AssertionError(
106*9c5db199SXin Li          'Attempting to release an unacquired reference.')
107*9c5db199SXin Li
108*9c5db199SXin Li    self.acquired = False
109*9c5db199SXin Li    self._lock.__exit__(None, None, None)
110*9c5db199SXin Li    self.read_locked = False
111*9c5db199SXin Li
112*9c5db199SXin Li  def __enter__(self):
113*9c5db199SXin Li    self.Acquire()
114*9c5db199SXin Li    return self
115*9c5db199SXin Li
116*9c5db199SXin Li  def __exit__(self, *args):
117*9c5db199SXin Li    self.Release()
118*9c5db199SXin Li
119*9c5db199SXin Li  def _ReadLock(self):
120*9c5db199SXin Li    self._lock.read_lock()
121*9c5db199SXin Li    self.read_locked = True
122*9c5db199SXin Li
123*9c5db199SXin Li  @WriteLock
124*9c5db199SXin Li  def _Assign(self, path):
125*9c5db199SXin Li    self._cache._Insert(self.key, path)
126*9c5db199SXin Li
127*9c5db199SXin Li  @WriteLock
128*9c5db199SXin Li  def _AssignText(self, text):
129*9c5db199SXin Li    self._cache._InsertText(self.key, text)
130*9c5db199SXin Li
131*9c5db199SXin Li  @WriteLock
132*9c5db199SXin Li  def _Remove(self):
133*9c5db199SXin Li    self._cache._Remove(self.key)
134*9c5db199SXin Li    osutils.SafeUnlink(self._lock.path)
135*9c5db199SXin Li    osutils.SafeUnlink(self._entry_lock.path)
136*9c5db199SXin Li
137*9c5db199SXin Li  def _Exists(self):
138*9c5db199SXin Li    return self._cache._KeyExists(self.key)
139*9c5db199SXin Li
140*9c5db199SXin Li  @EntryLock
141*9c5db199SXin Li  def Assign(self, path):
142*9c5db199SXin Li    """Insert a file or a directory into the cache at the referenced key."""
143*9c5db199SXin Li    self._Assign(path)
144*9c5db199SXin Li
145*9c5db199SXin Li  @EntryLock
146*9c5db199SXin Li  def AssignText(self, text):
147*9c5db199SXin Li    """Create a file containing |text| and assign it to the key.
148*9c5db199SXin Li
149*9c5db199SXin Li    Args:
150*9c5db199SXin Li      text: Can be a string or an iterable.
151*9c5db199SXin Li    """
152*9c5db199SXin Li    self._AssignText(text)
153*9c5db199SXin Li
154*9c5db199SXin Li  @EntryLock
155*9c5db199SXin Li  def Remove(self):
156*9c5db199SXin Li    """Removes the entry from the cache."""
157*9c5db199SXin Li    self._Remove()
158*9c5db199SXin Li
159*9c5db199SXin Li  @EntryLock
160*9c5db199SXin Li  def Exists(self, lock=False):
161*9c5db199SXin Li    """Tests for existence of entry.
162*9c5db199SXin Li
163*9c5db199SXin Li    Args:
164*9c5db199SXin Li      lock: If the entry exists, acquire and maintain a read lock on it.
165*9c5db199SXin Li    """
166*9c5db199SXin Li    if self._Exists():
167*9c5db199SXin Li      if lock:
168*9c5db199SXin Li        self._ReadLock()
169*9c5db199SXin Li      return True
170*9c5db199SXin Li    return False
171*9c5db199SXin Li
172*9c5db199SXin Li  @EntryLock
173*9c5db199SXin Li  def SetDefault(self, default_path, lock=False):
174*9c5db199SXin Li    """Assigns default_path if the entry doesn't exist.
175*9c5db199SXin Li
176*9c5db199SXin Li    Args:
177*9c5db199SXin Li      default_path: The path to assign if the entry doesn't exist.
178*9c5db199SXin Li      lock: Acquire and maintain a read lock on the entry.
179*9c5db199SXin Li    """
180*9c5db199SXin Li    if not self._Exists():
181*9c5db199SXin Li      self._Assign(default_path)
182*9c5db199SXin Li    if lock:
183*9c5db199SXin Li      self._ReadLock()
184*9c5db199SXin Li
185*9c5db199SXin Li
186*9c5db199SXin Liclass DiskCache(object):
187*9c5db199SXin Li  """Locked file system cache keyed by tuples.
188*9c5db199SXin Li
189*9c5db199SXin Li  Key entries can be files or directories.  Access to the cache is provided
190*9c5db199SXin Li  through CacheReferences, which are retrieved by using the cache Lookup()
191*9c5db199SXin Li  method.
192*9c5db199SXin Li  """
193*9c5db199SXin Li  _STAGING_DIR = 'staging'
194*9c5db199SXin Li
195*9c5db199SXin Li  def __init__(self, cache_dir, cache_user=None, lock_suffix='.lock'):
196*9c5db199SXin Li    self._cache_dir = cache_dir
197*9c5db199SXin Li    self._cache_user = cache_user
198*9c5db199SXin Li    self._lock_suffix = lock_suffix
199*9c5db199SXin Li    self.staging_dir = os.path.join(cache_dir, self._STAGING_DIR)
200*9c5db199SXin Li
201*9c5db199SXin Li    osutils.SafeMakedirsNonRoot(self._cache_dir, user=self._cache_user)
202*9c5db199SXin Li    osutils.SafeMakedirsNonRoot(self.staging_dir, user=self._cache_user)
203*9c5db199SXin Li
204*9c5db199SXin Li  def _KeyExists(self, key):
205*9c5db199SXin Li    return os.path.lexists(self.GetKeyPath(key))
206*9c5db199SXin Li
207*9c5db199SXin Li  def GetKeyPath(self, key):
208*9c5db199SXin Li    """Get the on-disk path of a key."""
209*9c5db199SXin Li    return os.path.join(self._cache_dir, '+'.join(key))
210*9c5db199SXin Li
211*9c5db199SXin Li  def _LockForKey(self, key, suffix=None):
212*9c5db199SXin Li    """Returns an unacquired lock associated with a key."""
213*9c5db199SXin Li    suffix = suffix or self._lock_suffix
214*9c5db199SXin Li    key_path = self.GetKeyPath(key)
215*9c5db199SXin Li    osutils.SafeMakedirsNonRoot(os.path.dirname(key_path),
216*9c5db199SXin Li                                user=self._cache_user)
217*9c5db199SXin Li    lock_path = os.path.join(self._cache_dir, os.path.dirname(key_path),
218*9c5db199SXin Li                             os.path.basename(key_path) + suffix)
219*9c5db199SXin Li    return locking.FileLock(lock_path)
220*9c5db199SXin Li
221*9c5db199SXin Li  def _TempDirContext(self):
222*9c5db199SXin Li    return osutils.TempDir(base_dir=self.staging_dir)
223*9c5db199SXin Li
224*9c5db199SXin Li  def _Insert(self, key, path):
225*9c5db199SXin Li    """Insert a file or a directory into the cache at a given key."""
226*9c5db199SXin Li    self._Remove(key)
227*9c5db199SXin Li    key_path = self.GetKeyPath(key)
228*9c5db199SXin Li    osutils.SafeMakedirsNonRoot(os.path.dirname(key_path),
229*9c5db199SXin Li                                user=self._cache_user)
230*9c5db199SXin Li    shutil.move(path, key_path)
231*9c5db199SXin Li
232*9c5db199SXin Li  def _InsertText(self, key, text):
233*9c5db199SXin Li    """Inserts a file containing |text| into the cache."""
234*9c5db199SXin Li    with self._TempDirContext() as tempdir:
235*9c5db199SXin Li      file_path = os.path.join(tempdir, 'tempfile')
236*9c5db199SXin Li      osutils.WriteFile(file_path, text)
237*9c5db199SXin Li      self._Insert(key, file_path)
238*9c5db199SXin Li
239*9c5db199SXin Li  def _Remove(self, key):
240*9c5db199SXin Li    """Remove a key from the cache."""
241*9c5db199SXin Li    if self._KeyExists(key):
242*9c5db199SXin Li      with self._TempDirContext() as tempdir:
243*9c5db199SXin Li        shutil.move(self.GetKeyPath(key), tempdir)
244*9c5db199SXin Li
245*9c5db199SXin Li  def GetKey(self, path):
246*9c5db199SXin Li    """Returns the key for an item's path in the cache."""
247*9c5db199SXin Li    if self._cache_dir in path:
248*9c5db199SXin Li      path = os.path.relpath(path, self._cache_dir)
249*9c5db199SXin Li    return tuple(path.split('+'))
250*9c5db199SXin Li
251*9c5db199SXin Li  def ListKeys(self):
252*9c5db199SXin Li    """Returns a list of keys for every item present in the cache."""
253*9c5db199SXin Li    keys = []
254*9c5db199SXin Li    for root, dirs, files in os.walk(self._cache_dir):
255*9c5db199SXin Li      for f in dirs + files:
256*9c5db199SXin Li        key_path = os.path.join(root, f)
257*9c5db199SXin Li        if os.path.exists(key_path + self._lock_suffix):
258*9c5db199SXin Li          # Test for the presence of the key's lock file to determine if this
259*9c5db199SXin Li          # is the root key path, or some file nested within a key's dir.
260*9c5db199SXin Li          keys.append(self.GetKey(key_path))
261*9c5db199SXin Li    return keys
262*9c5db199SXin Li
263*9c5db199SXin Li  def Lookup(self, key):
264*9c5db199SXin Li    """Get a reference to a given key."""
265*9c5db199SXin Li    return CacheReference(self, key)
266*9c5db199SXin Li
267*9c5db199SXin Li  def DeleteStale(self, max_age):
268*9c5db199SXin Li    """Removes any item from the cache that was modified after a given lifetime.
269*9c5db199SXin Li
270*9c5db199SXin Li    Args:
271*9c5db199SXin Li      max_age: An instance of datetime.timedelta. Any item not modified within
272*9c5db199SXin Li          this amount of time will be removed.
273*9c5db199SXin Li
274*9c5db199SXin Li    Returns:
275*9c5db199SXin Li      List of keys removed.
276*9c5db199SXin Li    """
277*9c5db199SXin Li    if not isinstance(max_age, datetime.timedelta):
278*9c5db199SXin Li      raise TypeError('max_age must be an instance of datetime.timedelta.')
279*9c5db199SXin Li    keys_removed = []
280*9c5db199SXin Li    for key in self.ListKeys():
281*9c5db199SXin Li      path = self.GetKeyPath(key)
282*9c5db199SXin Li      mtime = max(os.path.getmtime(path), os.path.getctime(path))
283*9c5db199SXin Li      time_since_last_modify = (
284*9c5db199SXin Li          datetime.datetime.now() - datetime.datetime.fromtimestamp(mtime))
285*9c5db199SXin Li      if time_since_last_modify > max_age:
286*9c5db199SXin Li        self.Lookup(key).Remove()
287*9c5db199SXin Li        keys_removed.append(key)
288*9c5db199SXin Li    return keys_removed
289*9c5db199SXin Li
290*9c5db199SXin Li
291*9c5db199SXin Liclass RemoteCache(DiskCache):
292*9c5db199SXin Li  """Supports caching of remote objects via URI."""
293*9c5db199SXin Li
294*9c5db199SXin Li  def _Fetch(self, url, local_path):
295*9c5db199SXin Li    """Fetch a remote file."""
296*9c5db199SXin Li    # We have to nest the import because gs.GSContext uses us to cache its own
297*9c5db199SXin Li    # gsutil tarball.  We know we won't get into a recursive loop though as it
298*9c5db199SXin Li    # only fetches files via non-gs URIs.
299*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import gs
300*9c5db199SXin Li
301*9c5db199SXin Li    if gs.PathIsGs(url):
302*9c5db199SXin Li      ctx = gs.GSContext()
303*9c5db199SXin Li      ctx.Copy(url, local_path)
304*9c5db199SXin Li    else:
305*9c5db199SXin Li      # Note: unittests assume local_path is at the end.
306*9c5db199SXin Li      retry_util.RunCurl(['--fail', url, '-o', local_path],
307*9c5db199SXin Li                         debug_level=logging.DEBUG, capture_output=True)
308*9c5db199SXin Li
309*9c5db199SXin Li  def _Insert(self, key, url):  # pylint: disable=arguments-differ
310*9c5db199SXin Li    """Insert a remote file into the cache."""
311*9c5db199SXin Li    o = urllib.parse.urlparse(url)
312*9c5db199SXin Li    if o.scheme in ('file', ''):
313*9c5db199SXin Li      DiskCache._Insert(self, key, o.path)
314*9c5db199SXin Li      return
315*9c5db199SXin Li
316*9c5db199SXin Li    with tempfile.NamedTemporaryFile(dir=self.staging_dir,
317*9c5db199SXin Li                                     delete=False) as local_path:
318*9c5db199SXin Li      self._Fetch(url, local_path.name)
319*9c5db199SXin Li      DiskCache._Insert(self, key, local_path.name)
320*9c5db199SXin Li
321*9c5db199SXin Li
322*9c5db199SXin Lidef Untar(path, cwd, sudo=False):
323*9c5db199SXin Li  """Untar a tarball."""
324*9c5db199SXin Li  functor = cros_build_lib.sudo_run if sudo else cros_build_lib.run
325*9c5db199SXin Li  comp = cros_build_lib.CompressionExtToType(path)
326*9c5db199SXin Li  cmd = ['tar']
327*9c5db199SXin Li  if comp != cros_build_lib.COMP_NONE:
328*9c5db199SXin Li    cmd += ['-I', cros_build_lib.FindCompressor(comp)]
329*9c5db199SXin Li  functor(cmd + ['-xpf', path], cwd=cwd, debug_level=logging.DEBUG, quiet=True)
330*9c5db199SXin Li
331*9c5db199SXin Li
332*9c5db199SXin Liclass TarballCache(RemoteCache):
333*9c5db199SXin Li  """Supports caching of extracted tarball contents."""
334*9c5db199SXin Li
335*9c5db199SXin Li  def _Insert(self, key, tarball_path):  # pylint: disable=arguments-differ
336*9c5db199SXin Li    """Insert a tarball and its extracted contents into the cache.
337*9c5db199SXin Li
338*9c5db199SXin Li    Download the tarball first if a URL is provided as tarball_path.
339*9c5db199SXin Li    """
340*9c5db199SXin Li    with osutils.TempDir(prefix='tarball-cache',
341*9c5db199SXin Li                         base_dir=self.staging_dir) as tempdir:
342*9c5db199SXin Li
343*9c5db199SXin Li      o = urllib.parse.urlsplit(tarball_path)
344*9c5db199SXin Li      if o.scheme == 'file':
345*9c5db199SXin Li        tarball_path = o.path
346*9c5db199SXin Li      elif o.scheme:
347*9c5db199SXin Li        url = tarball_path
348*9c5db199SXin Li        tarball_path = os.path.join(tempdir, os.path.basename(o.path))
349*9c5db199SXin Li        self._Fetch(url, tarball_path)
350*9c5db199SXin Li
351*9c5db199SXin Li      extract_path = os.path.join(tempdir, 'extract')
352*9c5db199SXin Li      os.mkdir(extract_path)
353*9c5db199SXin Li      Untar(tarball_path, extract_path)
354*9c5db199SXin Li      DiskCache._Insert(self, key, extract_path)
355*9c5db199SXin Li
356*9c5db199SXin Li  def _KeyExists(self, key):
357*9c5db199SXin Li    """Specialized DiskCache._KeyExits that ignores empty directories.
358*9c5db199SXin Li
359*9c5db199SXin Li    The normal _KeyExists just checks to see if the key path exists in the cache
360*9c5db199SXin Li    directory. Many tests mock out run then fetch a tarball. The mock
361*9c5db199SXin Li    blocks untarring into it. This leaves behind an empty dir which blocks
362*9c5db199SXin Li    future untarring in non-test scripts.
363*9c5db199SXin Li
364*9c5db199SXin Li    See crbug.com/468838
365*9c5db199SXin Li    """
366*9c5db199SXin Li    # Wipe out empty directories before testing for existence.
367*9c5db199SXin Li    key_path = self.GetKeyPath(key)
368*9c5db199SXin Li
369*9c5db199SXin Li    try:
370*9c5db199SXin Li      os.rmdir(key_path)
371*9c5db199SXin Li    except OSError as ex:
372*9c5db199SXin Li      if ex.errno not in (errno.ENOTEMPTY, errno.ENOENT):
373*9c5db199SXin Li        raise
374*9c5db199SXin Li
375*9c5db199SXin Li    return os.path.exists(key_path)
376