1*9c5db199SXin Li# -*- coding: utf-8 -*- 2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li 6*9c5db199SXin Li"""Contains on-disk caching functionality.""" 7*9c5db199SXin Li 8*9c5db199SXin Lifrom __future__ import print_function 9*9c5db199SXin Li 10*9c5db199SXin Liimport datetime 11*9c5db199SXin Liimport errno 12*9c5db199SXin Liimport os 13*9c5db199SXin Liimport shutil 14*9c5db199SXin Liimport tempfile 15*9c5db199SXin Li 16*9c5db199SXin Lifrom six.moves import urllib 17*9c5db199SXin Li 18*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_build_lib 19*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import cros_logging as logging 20*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import locking 21*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import osutils 22*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import retry_util 23*9c5db199SXin Li 24*9c5db199SXin Li 25*9c5db199SXin Li# pylint: disable=protected-access 26*9c5db199SXin Li 27*9c5db199SXin Li 28*9c5db199SXin Lidef EntryLock(f): 29*9c5db199SXin Li """Decorator that provides monitor access control.""" 30*9c5db199SXin Li 31*9c5db199SXin Li def new_f(self, *args, **kwargs): 32*9c5db199SXin Li # Ensure we don't have a read lock before potentially blocking while trying 33*9c5db199SXin Li # to access the monitor. 34*9c5db199SXin Li if self.read_locked: 35*9c5db199SXin Li raise AssertionError( 36*9c5db199SXin Li 'Cannot call %s while holding a read lock.' % f.__name__) 37*9c5db199SXin Li 38*9c5db199SXin Li with self._entry_lock: 39*9c5db199SXin Li self._entry_lock.write_lock() 40*9c5db199SXin Li return f(self, *args, **kwargs) 41*9c5db199SXin Li return new_f 42*9c5db199SXin Li 43*9c5db199SXin Li 44*9c5db199SXin Lidef WriteLock(f): 45*9c5db199SXin Li """Decorator that takes a write lock.""" 46*9c5db199SXin Li 47*9c5db199SXin Li def new_f(self, *args, **kwargs): 48*9c5db199SXin Li with self._lock.write_lock(): 49*9c5db199SXin Li return f(self, *args, **kwargs) 50*9c5db199SXin Li return new_f 51*9c5db199SXin Li 52*9c5db199SXin Li 53*9c5db199SXin Liclass CacheReference(object): 54*9c5db199SXin Li """Encapsulates operations on a cache key reference. 55*9c5db199SXin Li 56*9c5db199SXin Li CacheReferences are returned by the DiskCache.Lookup() function. They are 57*9c5db199SXin Li used to read from and insert into the cache. 58*9c5db199SXin Li 59*9c5db199SXin Li A typical example of using a CacheReference: 60*9c5db199SXin Li 61*9c5db199SXin Li @contextlib.contextmanager 62*9c5db199SXin Li def FetchFromCache() 63*9c5db199SXin Li with cache.Lookup(key) as ref: 64*9c5db199SXin Li # If entry doesn't exist in cache already, generate it ourselves, and 65*9c5db199SXin Li # insert it into the cache, acquiring a read lock on it in the process. 66*9c5db199SXin Li # If the entry does exist, we grab a read lock on it. 67*9c5db199SXin Li if not ref.Exists(lock=True): 68*9c5db199SXin Li path = PrepareItem() 69*9c5db199SXin Li ref.SetDefault(path, lock=True) 70*9c5db199SXin Li 71*9c5db199SXin Li # yield the path to the cached entry to consuming code. 72*9c5db199SXin Li yield ref.path 73*9c5db199SXin Li """ 74*9c5db199SXin Li 75*9c5db199SXin Li def __init__(self, cache, key): 76*9c5db199SXin Li self._cache = cache 77*9c5db199SXin Li self.key = key 78*9c5db199SXin Li self.acquired = False 79*9c5db199SXin Li self.read_locked = False 80*9c5db199SXin Li self._lock = cache._LockForKey(key) 81*9c5db199SXin Li self._entry_lock = cache._LockForKey(key, suffix='.entry_lock') 82*9c5db199SXin Li 83*9c5db199SXin Li @property 84*9c5db199SXin Li def path(self): 85*9c5db199SXin Li """Returns on-disk path to the cached item.""" 86*9c5db199SXin Li return self._cache.GetKeyPath(self.key) 87*9c5db199SXin Li 88*9c5db199SXin Li def Acquire(self): 89*9c5db199SXin Li """Prepare the cache reference for operation. 90*9c5db199SXin Li 91*9c5db199SXin Li This must be called (either explicitly or through entering a 'with' 92*9c5db199SXin Li context) before calling any methods that acquire locks, or mutates 93*9c5db199SXin Li reference. 94*9c5db199SXin Li """ 95*9c5db199SXin Li if self.acquired: 96*9c5db199SXin Li raise AssertionError( 97*9c5db199SXin Li 'Attempting to acquire an already acquired reference.') 98*9c5db199SXin Li 99*9c5db199SXin Li self.acquired = True 100*9c5db199SXin Li self._lock.__enter__() 101*9c5db199SXin Li 102*9c5db199SXin Li def Release(self): 103*9c5db199SXin Li """Release the cache reference. Causes any held locks to be released.""" 104*9c5db199SXin Li if not self.acquired: 105*9c5db199SXin Li raise AssertionError( 106*9c5db199SXin Li 'Attempting to release an unacquired reference.') 107*9c5db199SXin Li 108*9c5db199SXin Li self.acquired = False 109*9c5db199SXin Li self._lock.__exit__(None, None, None) 110*9c5db199SXin Li self.read_locked = False 111*9c5db199SXin Li 112*9c5db199SXin Li def __enter__(self): 113*9c5db199SXin Li self.Acquire() 114*9c5db199SXin Li return self 115*9c5db199SXin Li 116*9c5db199SXin Li def __exit__(self, *args): 117*9c5db199SXin Li self.Release() 118*9c5db199SXin Li 119*9c5db199SXin Li def _ReadLock(self): 120*9c5db199SXin Li self._lock.read_lock() 121*9c5db199SXin Li self.read_locked = True 122*9c5db199SXin Li 123*9c5db199SXin Li @WriteLock 124*9c5db199SXin Li def _Assign(self, path): 125*9c5db199SXin Li self._cache._Insert(self.key, path) 126*9c5db199SXin Li 127*9c5db199SXin Li @WriteLock 128*9c5db199SXin Li def _AssignText(self, text): 129*9c5db199SXin Li self._cache._InsertText(self.key, text) 130*9c5db199SXin Li 131*9c5db199SXin Li @WriteLock 132*9c5db199SXin Li def _Remove(self): 133*9c5db199SXin Li self._cache._Remove(self.key) 134*9c5db199SXin Li osutils.SafeUnlink(self._lock.path) 135*9c5db199SXin Li osutils.SafeUnlink(self._entry_lock.path) 136*9c5db199SXin Li 137*9c5db199SXin Li def _Exists(self): 138*9c5db199SXin Li return self._cache._KeyExists(self.key) 139*9c5db199SXin Li 140*9c5db199SXin Li @EntryLock 141*9c5db199SXin Li def Assign(self, path): 142*9c5db199SXin Li """Insert a file or a directory into the cache at the referenced key.""" 143*9c5db199SXin Li self._Assign(path) 144*9c5db199SXin Li 145*9c5db199SXin Li @EntryLock 146*9c5db199SXin Li def AssignText(self, text): 147*9c5db199SXin Li """Create a file containing |text| and assign it to the key. 148*9c5db199SXin Li 149*9c5db199SXin Li Args: 150*9c5db199SXin Li text: Can be a string or an iterable. 151*9c5db199SXin Li """ 152*9c5db199SXin Li self._AssignText(text) 153*9c5db199SXin Li 154*9c5db199SXin Li @EntryLock 155*9c5db199SXin Li def Remove(self): 156*9c5db199SXin Li """Removes the entry from the cache.""" 157*9c5db199SXin Li self._Remove() 158*9c5db199SXin Li 159*9c5db199SXin Li @EntryLock 160*9c5db199SXin Li def Exists(self, lock=False): 161*9c5db199SXin Li """Tests for existence of entry. 162*9c5db199SXin Li 163*9c5db199SXin Li Args: 164*9c5db199SXin Li lock: If the entry exists, acquire and maintain a read lock on it. 165*9c5db199SXin Li """ 166*9c5db199SXin Li if self._Exists(): 167*9c5db199SXin Li if lock: 168*9c5db199SXin Li self._ReadLock() 169*9c5db199SXin Li return True 170*9c5db199SXin Li return False 171*9c5db199SXin Li 172*9c5db199SXin Li @EntryLock 173*9c5db199SXin Li def SetDefault(self, default_path, lock=False): 174*9c5db199SXin Li """Assigns default_path if the entry doesn't exist. 175*9c5db199SXin Li 176*9c5db199SXin Li Args: 177*9c5db199SXin Li default_path: The path to assign if the entry doesn't exist. 178*9c5db199SXin Li lock: Acquire and maintain a read lock on the entry. 179*9c5db199SXin Li """ 180*9c5db199SXin Li if not self._Exists(): 181*9c5db199SXin Li self._Assign(default_path) 182*9c5db199SXin Li if lock: 183*9c5db199SXin Li self._ReadLock() 184*9c5db199SXin Li 185*9c5db199SXin Li 186*9c5db199SXin Liclass DiskCache(object): 187*9c5db199SXin Li """Locked file system cache keyed by tuples. 188*9c5db199SXin Li 189*9c5db199SXin Li Key entries can be files or directories. Access to the cache is provided 190*9c5db199SXin Li through CacheReferences, which are retrieved by using the cache Lookup() 191*9c5db199SXin Li method. 192*9c5db199SXin Li """ 193*9c5db199SXin Li _STAGING_DIR = 'staging' 194*9c5db199SXin Li 195*9c5db199SXin Li def __init__(self, cache_dir, cache_user=None, lock_suffix='.lock'): 196*9c5db199SXin Li self._cache_dir = cache_dir 197*9c5db199SXin Li self._cache_user = cache_user 198*9c5db199SXin Li self._lock_suffix = lock_suffix 199*9c5db199SXin Li self.staging_dir = os.path.join(cache_dir, self._STAGING_DIR) 200*9c5db199SXin Li 201*9c5db199SXin Li osutils.SafeMakedirsNonRoot(self._cache_dir, user=self._cache_user) 202*9c5db199SXin Li osutils.SafeMakedirsNonRoot(self.staging_dir, user=self._cache_user) 203*9c5db199SXin Li 204*9c5db199SXin Li def _KeyExists(self, key): 205*9c5db199SXin Li return os.path.lexists(self.GetKeyPath(key)) 206*9c5db199SXin Li 207*9c5db199SXin Li def GetKeyPath(self, key): 208*9c5db199SXin Li """Get the on-disk path of a key.""" 209*9c5db199SXin Li return os.path.join(self._cache_dir, '+'.join(key)) 210*9c5db199SXin Li 211*9c5db199SXin Li def _LockForKey(self, key, suffix=None): 212*9c5db199SXin Li """Returns an unacquired lock associated with a key.""" 213*9c5db199SXin Li suffix = suffix or self._lock_suffix 214*9c5db199SXin Li key_path = self.GetKeyPath(key) 215*9c5db199SXin Li osutils.SafeMakedirsNonRoot(os.path.dirname(key_path), 216*9c5db199SXin Li user=self._cache_user) 217*9c5db199SXin Li lock_path = os.path.join(self._cache_dir, os.path.dirname(key_path), 218*9c5db199SXin Li os.path.basename(key_path) + suffix) 219*9c5db199SXin Li return locking.FileLock(lock_path) 220*9c5db199SXin Li 221*9c5db199SXin Li def _TempDirContext(self): 222*9c5db199SXin Li return osutils.TempDir(base_dir=self.staging_dir) 223*9c5db199SXin Li 224*9c5db199SXin Li def _Insert(self, key, path): 225*9c5db199SXin Li """Insert a file or a directory into the cache at a given key.""" 226*9c5db199SXin Li self._Remove(key) 227*9c5db199SXin Li key_path = self.GetKeyPath(key) 228*9c5db199SXin Li osutils.SafeMakedirsNonRoot(os.path.dirname(key_path), 229*9c5db199SXin Li user=self._cache_user) 230*9c5db199SXin Li shutil.move(path, key_path) 231*9c5db199SXin Li 232*9c5db199SXin Li def _InsertText(self, key, text): 233*9c5db199SXin Li """Inserts a file containing |text| into the cache.""" 234*9c5db199SXin Li with self._TempDirContext() as tempdir: 235*9c5db199SXin Li file_path = os.path.join(tempdir, 'tempfile') 236*9c5db199SXin Li osutils.WriteFile(file_path, text) 237*9c5db199SXin Li self._Insert(key, file_path) 238*9c5db199SXin Li 239*9c5db199SXin Li def _Remove(self, key): 240*9c5db199SXin Li """Remove a key from the cache.""" 241*9c5db199SXin Li if self._KeyExists(key): 242*9c5db199SXin Li with self._TempDirContext() as tempdir: 243*9c5db199SXin Li shutil.move(self.GetKeyPath(key), tempdir) 244*9c5db199SXin Li 245*9c5db199SXin Li def GetKey(self, path): 246*9c5db199SXin Li """Returns the key for an item's path in the cache.""" 247*9c5db199SXin Li if self._cache_dir in path: 248*9c5db199SXin Li path = os.path.relpath(path, self._cache_dir) 249*9c5db199SXin Li return tuple(path.split('+')) 250*9c5db199SXin Li 251*9c5db199SXin Li def ListKeys(self): 252*9c5db199SXin Li """Returns a list of keys for every item present in the cache.""" 253*9c5db199SXin Li keys = [] 254*9c5db199SXin Li for root, dirs, files in os.walk(self._cache_dir): 255*9c5db199SXin Li for f in dirs + files: 256*9c5db199SXin Li key_path = os.path.join(root, f) 257*9c5db199SXin Li if os.path.exists(key_path + self._lock_suffix): 258*9c5db199SXin Li # Test for the presence of the key's lock file to determine if this 259*9c5db199SXin Li # is the root key path, or some file nested within a key's dir. 260*9c5db199SXin Li keys.append(self.GetKey(key_path)) 261*9c5db199SXin Li return keys 262*9c5db199SXin Li 263*9c5db199SXin Li def Lookup(self, key): 264*9c5db199SXin Li """Get a reference to a given key.""" 265*9c5db199SXin Li return CacheReference(self, key) 266*9c5db199SXin Li 267*9c5db199SXin Li def DeleteStale(self, max_age): 268*9c5db199SXin Li """Removes any item from the cache that was modified after a given lifetime. 269*9c5db199SXin Li 270*9c5db199SXin Li Args: 271*9c5db199SXin Li max_age: An instance of datetime.timedelta. Any item not modified within 272*9c5db199SXin Li this amount of time will be removed. 273*9c5db199SXin Li 274*9c5db199SXin Li Returns: 275*9c5db199SXin Li List of keys removed. 276*9c5db199SXin Li """ 277*9c5db199SXin Li if not isinstance(max_age, datetime.timedelta): 278*9c5db199SXin Li raise TypeError('max_age must be an instance of datetime.timedelta.') 279*9c5db199SXin Li keys_removed = [] 280*9c5db199SXin Li for key in self.ListKeys(): 281*9c5db199SXin Li path = self.GetKeyPath(key) 282*9c5db199SXin Li mtime = max(os.path.getmtime(path), os.path.getctime(path)) 283*9c5db199SXin Li time_since_last_modify = ( 284*9c5db199SXin Li datetime.datetime.now() - datetime.datetime.fromtimestamp(mtime)) 285*9c5db199SXin Li if time_since_last_modify > max_age: 286*9c5db199SXin Li self.Lookup(key).Remove() 287*9c5db199SXin Li keys_removed.append(key) 288*9c5db199SXin Li return keys_removed 289*9c5db199SXin Li 290*9c5db199SXin Li 291*9c5db199SXin Liclass RemoteCache(DiskCache): 292*9c5db199SXin Li """Supports caching of remote objects via URI.""" 293*9c5db199SXin Li 294*9c5db199SXin Li def _Fetch(self, url, local_path): 295*9c5db199SXin Li """Fetch a remote file.""" 296*9c5db199SXin Li # We have to nest the import because gs.GSContext uses us to cache its own 297*9c5db199SXin Li # gsutil tarball. We know we won't get into a recursive loop though as it 298*9c5db199SXin Li # only fetches files via non-gs URIs. 299*9c5db199SXin Li from autotest_lib.utils.frozen_chromite.lib import gs 300*9c5db199SXin Li 301*9c5db199SXin Li if gs.PathIsGs(url): 302*9c5db199SXin Li ctx = gs.GSContext() 303*9c5db199SXin Li ctx.Copy(url, local_path) 304*9c5db199SXin Li else: 305*9c5db199SXin Li # Note: unittests assume local_path is at the end. 306*9c5db199SXin Li retry_util.RunCurl(['--fail', url, '-o', local_path], 307*9c5db199SXin Li debug_level=logging.DEBUG, capture_output=True) 308*9c5db199SXin Li 309*9c5db199SXin Li def _Insert(self, key, url): # pylint: disable=arguments-differ 310*9c5db199SXin Li """Insert a remote file into the cache.""" 311*9c5db199SXin Li o = urllib.parse.urlparse(url) 312*9c5db199SXin Li if o.scheme in ('file', ''): 313*9c5db199SXin Li DiskCache._Insert(self, key, o.path) 314*9c5db199SXin Li return 315*9c5db199SXin Li 316*9c5db199SXin Li with tempfile.NamedTemporaryFile(dir=self.staging_dir, 317*9c5db199SXin Li delete=False) as local_path: 318*9c5db199SXin Li self._Fetch(url, local_path.name) 319*9c5db199SXin Li DiskCache._Insert(self, key, local_path.name) 320*9c5db199SXin Li 321*9c5db199SXin Li 322*9c5db199SXin Lidef Untar(path, cwd, sudo=False): 323*9c5db199SXin Li """Untar a tarball.""" 324*9c5db199SXin Li functor = cros_build_lib.sudo_run if sudo else cros_build_lib.run 325*9c5db199SXin Li comp = cros_build_lib.CompressionExtToType(path) 326*9c5db199SXin Li cmd = ['tar'] 327*9c5db199SXin Li if comp != cros_build_lib.COMP_NONE: 328*9c5db199SXin Li cmd += ['-I', cros_build_lib.FindCompressor(comp)] 329*9c5db199SXin Li functor(cmd + ['-xpf', path], cwd=cwd, debug_level=logging.DEBUG, quiet=True) 330*9c5db199SXin Li 331*9c5db199SXin Li 332*9c5db199SXin Liclass TarballCache(RemoteCache): 333*9c5db199SXin Li """Supports caching of extracted tarball contents.""" 334*9c5db199SXin Li 335*9c5db199SXin Li def _Insert(self, key, tarball_path): # pylint: disable=arguments-differ 336*9c5db199SXin Li """Insert a tarball and its extracted contents into the cache. 337*9c5db199SXin Li 338*9c5db199SXin Li Download the tarball first if a URL is provided as tarball_path. 339*9c5db199SXin Li """ 340*9c5db199SXin Li with osutils.TempDir(prefix='tarball-cache', 341*9c5db199SXin Li base_dir=self.staging_dir) as tempdir: 342*9c5db199SXin Li 343*9c5db199SXin Li o = urllib.parse.urlsplit(tarball_path) 344*9c5db199SXin Li if o.scheme == 'file': 345*9c5db199SXin Li tarball_path = o.path 346*9c5db199SXin Li elif o.scheme: 347*9c5db199SXin Li url = tarball_path 348*9c5db199SXin Li tarball_path = os.path.join(tempdir, os.path.basename(o.path)) 349*9c5db199SXin Li self._Fetch(url, tarball_path) 350*9c5db199SXin Li 351*9c5db199SXin Li extract_path = os.path.join(tempdir, 'extract') 352*9c5db199SXin Li os.mkdir(extract_path) 353*9c5db199SXin Li Untar(tarball_path, extract_path) 354*9c5db199SXin Li DiskCache._Insert(self, key, extract_path) 355*9c5db199SXin Li 356*9c5db199SXin Li def _KeyExists(self, key): 357*9c5db199SXin Li """Specialized DiskCache._KeyExits that ignores empty directories. 358*9c5db199SXin Li 359*9c5db199SXin Li The normal _KeyExists just checks to see if the key path exists in the cache 360*9c5db199SXin Li directory. Many tests mock out run then fetch a tarball. The mock 361*9c5db199SXin Li blocks untarring into it. This leaves behind an empty dir which blocks 362*9c5db199SXin Li future untarring in non-test scripts. 363*9c5db199SXin Li 364*9c5db199SXin Li See crbug.com/468838 365*9c5db199SXin Li """ 366*9c5db199SXin Li # Wipe out empty directories before testing for existence. 367*9c5db199SXin Li key_path = self.GetKeyPath(key) 368*9c5db199SXin Li 369*9c5db199SXin Li try: 370*9c5db199SXin Li os.rmdir(key_path) 371*9c5db199SXin Li except OSError as ex: 372*9c5db199SXin Li if ex.errno not in (errno.ENOTEMPTY, errno.ENOENT): 373*9c5db199SXin Li raise 374*9c5db199SXin Li 375*9c5db199SXin Li return os.path.exists(key_path) 376