1# Copyright 2017 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Skylib module containing file path manipulation functions. 16 17NOTE: The functions in this module currently only support paths with Unix-style 18path separators (forward slash, "/"); they do not handle Windows-style paths 19with backslash separators or drive letters. 20""" 21 22def _basename(p): 23 """Returns the basename (i.e., the file portion) of a path. 24 25 Note that if `p` ends with a slash, this function returns an empty string. 26 This matches the behavior of Python's `os.path.basename`, but differs from 27 the Unix `basename` command (which would return the path segment preceding 28 the final slash). 29 30 Args: 31 p: The path whose basename should be returned. 32 33 Returns: 34 The basename of the path, which includes the extension. 35 """ 36 return p.rpartition("/")[-1] 37 38def _dirname(p): 39 """Returns the dirname of a path. 40 41 The dirname is the portion of `p` up to but not including the file portion 42 (i.e., the basename). Any slashes immediately preceding the basename are not 43 included, unless omitting them would make the dirname empty. 44 45 Args: 46 p: The path whose dirname should be returned. 47 48 Returns: 49 The dirname of the path. 50 """ 51 prefix, sep, _ = p.rpartition("/") 52 if not prefix: 53 return sep 54 else: 55 # If there are multiple consecutive slashes, strip them all out as Python's 56 # os.path.dirname does. 57 return prefix.rstrip("/") 58 59def _is_absolute(path): 60 """Returns `True` if `path` is an absolute path. 61 62 Args: 63 path: A path (which is a string). 64 65 Returns: 66 `True` if `path` is an absolute path. 67 """ 68 return path.startswith("/") or (len(path) > 2 and path[1] == ":") 69 70def _join(path, *others): 71 """Joins one or more path components intelligently. 72 73 This function mimics the behavior of Python's `os.path.join` function on POSIX 74 platform. It returns the concatenation of `path` and any members of `others`, 75 inserting directory separators before each component except the first. The 76 separator is not inserted if the path up until that point is either empty or 77 already ends in a separator. 78 79 If any component is an absolute path, all previous components are discarded. 80 81 Args: 82 path: A path segment. 83 *others: Additional path segments. 84 85 Returns: 86 A string containing the joined paths. 87 """ 88 result = path 89 90 for p in others: 91 if _is_absolute(p): 92 result = p 93 elif not result or result.endswith("/"): 94 result += p 95 else: 96 result += "/" + p 97 98 return result 99 100def _normalize(path): 101 """Normalizes a path, eliminating double slashes and other redundant segments. 102 103 This function mimics the behavior of Python's `os.path.normpath` function on 104 POSIX platforms; specifically: 105 106 - If the entire path is empty, "." is returned. 107 - All "." segments are removed, unless the path consists solely of a single 108 "." segment. 109 - Trailing slashes are removed, unless the path consists solely of slashes. 110 - ".." segments are removed as long as there are corresponding segments 111 earlier in the path to remove; otherwise, they are retained as leading ".." 112 segments. 113 - Single and double leading slashes are preserved, but three or more leading 114 slashes are collapsed into a single leading slash. 115 - Multiple adjacent internal slashes are collapsed into a single slash. 116 117 Args: 118 path: A path. 119 120 Returns: 121 The normalized path. 122 """ 123 if not path: 124 return "." 125 126 if path.startswith("//") and not path.startswith("///"): 127 initial_slashes = 2 128 elif path.startswith("/"): 129 initial_slashes = 1 130 else: 131 initial_slashes = 0 132 is_relative = (initial_slashes == 0) 133 134 components = path.split("/") 135 new_components = [] 136 137 for component in components: 138 if component in ("", "."): 139 continue 140 if component == "..": 141 if new_components and new_components[-1] != "..": 142 # Only pop the last segment if it isn't another "..". 143 new_components.pop() 144 elif is_relative: 145 # Preserve leading ".." segments for relative paths. 146 new_components.append(component) 147 else: 148 new_components.append(component) 149 150 path = "/".join(new_components) 151 if not is_relative: 152 path = ("/" * initial_slashes) + path 153 154 return path or "." 155 156_BASE = 0 157_SEPARATOR = 1 158_DOT = 2 159_DOTDOT = 3 160 161def _is_normalized(str, look_for_same_level_references = True): 162 """Returns true if the passed path doesn't contain uplevel references "..". 163 164 Also checks for single-dot references "." if look_for_same_level_references 165 is `True.` 166 167 Args: 168 str: The path string to check. 169 look_for_same_level_references: If True checks if path doesn't contain 170 uplevel references ".." or single-dot references ".". 171 172 Returns: 173 True if the path is normalized, False otherwise. 174 """ 175 state = _SEPARATOR 176 for c in str.elems(): 177 is_separator = False 178 if c == "/": 179 is_separator = True 180 181 if state == _BASE: 182 if is_separator: 183 state = _SEPARATOR 184 else: 185 state = _BASE 186 elif state == _SEPARATOR: 187 if is_separator: 188 state = _SEPARATOR 189 elif c == ".": 190 state = _DOT 191 else: 192 state = _BASE 193 elif state == _DOT: 194 if is_separator: 195 if look_for_same_level_references: 196 # "." segment found. 197 return False 198 state = _SEPARATOR 199 elif c == ".": 200 state = _DOTDOT 201 else: 202 state = _BASE 203 elif state == _DOTDOT: 204 if is_separator: 205 return False 206 else: 207 state = _BASE 208 209 if state == _DOT: 210 if look_for_same_level_references: 211 # "." segment found. 212 return False 213 elif state == _DOTDOT: 214 return False 215 return True 216 217def _relativize(path, start): 218 """Returns the portion of `path` that is relative to `start`. 219 220 Because we do not have access to the underlying file system, this 221 implementation differs slightly from Python's `os.path.relpath` in that it 222 will fail if `path` is not beneath `start` (rather than use parent segments to 223 walk up to the common file system root). 224 225 Relativizing paths that start with parent directory references only works if 226 the path both start with the same initial parent references. 227 228 Args: 229 path: The path to relativize. 230 start: The ancestor path against which to relativize. 231 232 Returns: 233 The portion of `path` that is relative to `start`. 234 """ 235 segments = _normalize(path).split("/") 236 start_segments = _normalize(start).split("/") 237 if start_segments == ["."]: 238 start_segments = [] 239 start_length = len(start_segments) 240 241 if (path.startswith("/") != start.startswith("/") or 242 len(segments) < start_length): 243 fail("Path '%s' is not beneath '%s'" % (path, start)) 244 245 for ancestor_segment, segment in zip(start_segments, segments): 246 if ancestor_segment != segment: 247 fail("Path '%s' is not beneath '%s'" % (path, start)) 248 249 length = len(segments) - start_length 250 result_segments = segments[-length:] 251 return "/".join(result_segments) 252 253def _replace_extension(p, new_extension): 254 """Replaces the extension of the file at the end of a path. 255 256 If the path has no extension, the new extension is added to it. 257 258 Args: 259 p: The path whose extension should be replaced. 260 new_extension: The new extension for the file. The new extension should 261 begin with a dot if you want the new filename to have one. 262 263 Returns: 264 The path with the extension replaced (or added, if it did not have one). 265 """ 266 return _split_extension(p)[0] + new_extension 267 268def _split_extension(p): 269 """Splits the path `p` into a tuple containing the root and extension. 270 271 Leading periods on the basename are ignored, so 272 `path.split_extension(".bashrc")` returns `(".bashrc", "")`. 273 274 Args: 275 p: The path whose root and extension should be split. 276 277 Returns: 278 A tuple `(root, ext)` such that the root is the path without the file 279 extension, and `ext` is the file extension (which, if non-empty, contains 280 the leading dot). The returned tuple always satisfies the relationship 281 `root + ext == p`. 282 """ 283 b = _basename(p) 284 last_dot_in_basename = b.rfind(".") 285 286 # If there is no dot or the only dot in the basename is at the front, then 287 # there is no extension. 288 if last_dot_in_basename <= 0: 289 return (p, "") 290 291 dot_distance_from_end = len(b) - last_dot_in_basename 292 return (p[:-dot_distance_from_end], p[-dot_distance_from_end:]) 293 294def _starts_with(path_a, path_b): 295 """Returns True if and only if path_b is an ancestor of path_a. 296 297 Does not handle OS dependent case-insensitivity.""" 298 if not path_b: 299 # all paths start with the empty string 300 return True 301 norm_a = _normalize(path_a) 302 norm_b = _normalize(path_b) 303 if len(norm_b) > len(norm_a): 304 return False 305 if not norm_a.startswith(norm_b): 306 return False 307 return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/" 308 309paths = struct( 310 basename = _basename, 311 dirname = _dirname, 312 is_absolute = _is_absolute, 313 join = _join, 314 normalize = _normalize, 315 is_normalized = _is_normalized, 316 relativize = _relativize, 317 replace_extension = _replace_extension, 318 split_extension = _split_extension, 319 starts_with = _starts_with, 320) 321