xref: /aosp_15_r20/external/bazel-skylib/lib/paths.bzl (revision bcb5dc7965af6ee42bf2f21341a2ec00233a8c8a)
1# Copyright 2017 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Skylib module containing file path manipulation functions.
16
17NOTE: The functions in this module currently only support paths with Unix-style
18path separators (forward slash, "/"); they do not handle Windows-style paths
19with backslash separators or drive letters.
20"""
21
22def _basename(p):
23    """Returns the basename (i.e., the file portion) of a path.
24
25    Note that if `p` ends with a slash, this function returns an empty string.
26    This matches the behavior of Python's `os.path.basename`, but differs from
27    the Unix `basename` command (which would return the path segment preceding
28    the final slash).
29
30    Args:
31      p: The path whose basename should be returned.
32
33    Returns:
34      The basename of the path, which includes the extension.
35    """
36    return p.rpartition("/")[-1]
37
38def _dirname(p):
39    """Returns the dirname of a path.
40
41    The dirname is the portion of `p` up to but not including the file portion
42    (i.e., the basename). Any slashes immediately preceding the basename are not
43    included, unless omitting them would make the dirname empty.
44
45    Args:
46      p: The path whose dirname should be returned.
47
48    Returns:
49      The dirname of the path.
50    """
51    prefix, sep, _ = p.rpartition("/")
52    if not prefix:
53        return sep
54    else:
55        # If there are multiple consecutive slashes, strip them all out as Python's
56        # os.path.dirname does.
57        return prefix.rstrip("/")
58
59def _is_absolute(path):
60    """Returns `True` if `path` is an absolute path.
61
62    Args:
63      path: A path (which is a string).
64
65    Returns:
66      `True` if `path` is an absolute path.
67    """
68    return path.startswith("/") or (len(path) > 2 and path[1] == ":")
69
70def _join(path, *others):
71    """Joins one or more path components intelligently.
72
73    This function mimics the behavior of Python's `os.path.join` function on POSIX
74    platform. It returns the concatenation of `path` and any members of `others`,
75    inserting directory separators before each component except the first. The
76    separator is not inserted if the path up until that point is either empty or
77    already ends in a separator.
78
79    If any component is an absolute path, all previous components are discarded.
80
81    Args:
82      path: A path segment.
83      *others: Additional path segments.
84
85    Returns:
86      A string containing the joined paths.
87    """
88    result = path
89
90    for p in others:
91        if _is_absolute(p):
92            result = p
93        elif not result or result.endswith("/"):
94            result += p
95        else:
96            result += "/" + p
97
98    return result
99
100def _normalize(path):
101    """Normalizes a path, eliminating double slashes and other redundant segments.
102
103    This function mimics the behavior of Python's `os.path.normpath` function on
104    POSIX platforms; specifically:
105
106    - If the entire path is empty, "." is returned.
107    - All "." segments are removed, unless the path consists solely of a single
108      "." segment.
109    - Trailing slashes are removed, unless the path consists solely of slashes.
110    - ".." segments are removed as long as there are corresponding segments
111      earlier in the path to remove; otherwise, they are retained as leading ".."
112      segments.
113    - Single and double leading slashes are preserved, but three or more leading
114      slashes are collapsed into a single leading slash.
115    - Multiple adjacent internal slashes are collapsed into a single slash.
116
117    Args:
118      path: A path.
119
120    Returns:
121      The normalized path.
122    """
123    if not path:
124        return "."
125
126    if path.startswith("//") and not path.startswith("///"):
127        initial_slashes = 2
128    elif path.startswith("/"):
129        initial_slashes = 1
130    else:
131        initial_slashes = 0
132    is_relative = (initial_slashes == 0)
133
134    components = path.split("/")
135    new_components = []
136
137    for component in components:
138        if component in ("", "."):
139            continue
140        if component == "..":
141            if new_components and new_components[-1] != "..":
142                # Only pop the last segment if it isn't another "..".
143                new_components.pop()
144            elif is_relative:
145                # Preserve leading ".." segments for relative paths.
146                new_components.append(component)
147        else:
148            new_components.append(component)
149
150    path = "/".join(new_components)
151    if not is_relative:
152        path = ("/" * initial_slashes) + path
153
154    return path or "."
155
156_BASE = 0
157_SEPARATOR = 1
158_DOT = 2
159_DOTDOT = 3
160
161def _is_normalized(str, look_for_same_level_references = True):
162    """Returns true if the passed path doesn't contain uplevel references "..".
163
164    Also checks for single-dot references "." if look_for_same_level_references
165    is `True.`
166
167    Args:
168      str: The path string to check.
169      look_for_same_level_references: If True checks if path doesn't contain
170        uplevel references ".." or single-dot references ".".
171
172    Returns:
173      True if the path is normalized, False otherwise.
174    """
175    state = _SEPARATOR
176    for c in str.elems():
177        is_separator = False
178        if c == "/":
179            is_separator = True
180
181        if state == _BASE:
182            if is_separator:
183                state = _SEPARATOR
184            else:
185                state = _BASE
186        elif state == _SEPARATOR:
187            if is_separator:
188                state = _SEPARATOR
189            elif c == ".":
190                state = _DOT
191            else:
192                state = _BASE
193        elif state == _DOT:
194            if is_separator:
195                if look_for_same_level_references:
196                    # "." segment found.
197                    return False
198                state = _SEPARATOR
199            elif c == ".":
200                state = _DOTDOT
201            else:
202                state = _BASE
203        elif state == _DOTDOT:
204            if is_separator:
205                return False
206            else:
207                state = _BASE
208
209    if state == _DOT:
210        if look_for_same_level_references:
211            # "." segment found.
212            return False
213    elif state == _DOTDOT:
214        return False
215    return True
216
217def _relativize(path, start):
218    """Returns the portion of `path` that is relative to `start`.
219
220    Because we do not have access to the underlying file system, this
221    implementation differs slightly from Python's `os.path.relpath` in that it
222    will fail if `path` is not beneath `start` (rather than use parent segments to
223    walk up to the common file system root).
224
225    Relativizing paths that start with parent directory references only works if
226    the path both start with the same initial parent references.
227
228    Args:
229      path: The path to relativize.
230      start: The ancestor path against which to relativize.
231
232    Returns:
233      The portion of `path` that is relative to `start`.
234    """
235    segments = _normalize(path).split("/")
236    start_segments = _normalize(start).split("/")
237    if start_segments == ["."]:
238        start_segments = []
239    start_length = len(start_segments)
240
241    if (path.startswith("/") != start.startswith("/") or
242        len(segments) < start_length):
243        fail("Path '%s' is not beneath '%s'" % (path, start))
244
245    for ancestor_segment, segment in zip(start_segments, segments):
246        if ancestor_segment != segment:
247            fail("Path '%s' is not beneath '%s'" % (path, start))
248
249    length = len(segments) - start_length
250    result_segments = segments[-length:]
251    return "/".join(result_segments)
252
253def _replace_extension(p, new_extension):
254    """Replaces the extension of the file at the end of a path.
255
256    If the path has no extension, the new extension is added to it.
257
258    Args:
259      p: The path whose extension should be replaced.
260      new_extension: The new extension for the file. The new extension should
261          begin with a dot if you want the new filename to have one.
262
263    Returns:
264      The path with the extension replaced (or added, if it did not have one).
265    """
266    return _split_extension(p)[0] + new_extension
267
268def _split_extension(p):
269    """Splits the path `p` into a tuple containing the root and extension.
270
271    Leading periods on the basename are ignored, so
272    `path.split_extension(".bashrc")` returns `(".bashrc", "")`.
273
274    Args:
275      p: The path whose root and extension should be split.
276
277    Returns:
278      A tuple `(root, ext)` such that the root is the path without the file
279      extension, and `ext` is the file extension (which, if non-empty, contains
280      the leading dot). The returned tuple always satisfies the relationship
281      `root + ext == p`.
282    """
283    b = _basename(p)
284    last_dot_in_basename = b.rfind(".")
285
286    # If there is no dot or the only dot in the basename is at the front, then
287    # there is no extension.
288    if last_dot_in_basename <= 0:
289        return (p, "")
290
291    dot_distance_from_end = len(b) - last_dot_in_basename
292    return (p[:-dot_distance_from_end], p[-dot_distance_from_end:])
293
294def _starts_with(path_a, path_b):
295    """Returns True if and only if path_b is an ancestor of path_a.
296
297    Does not handle OS dependent case-insensitivity."""
298    if not path_b:
299        # all paths start with the empty string
300        return True
301    norm_a = _normalize(path_a)
302    norm_b = _normalize(path_b)
303    if len(norm_b) > len(norm_a):
304        return False
305    if not norm_a.startswith(norm_b):
306        return False
307    return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/"
308
309paths = struct(
310    basename = _basename,
311    dirname = _dirname,
312    is_absolute = _is_absolute,
313    join = _join,
314    normalize = _normalize,
315    is_normalized = _is_normalized,
316    relativize = _relativize,
317    replace_extension = _replace_extension,
318    split_extension = _split_extension,
319    starts_with = _starts_with,
320)
321