1*bcb5dc79SHONG Yifan# Copyright 2017 The Bazel Authors. All rights reserved. 2*bcb5dc79SHONG Yifan# 3*bcb5dc79SHONG Yifan# Licensed under the Apache License, Version 2.0 (the "License"); 4*bcb5dc79SHONG Yifan# you may not use this file except in compliance with the License. 5*bcb5dc79SHONG Yifan# You may obtain a copy of the License at 6*bcb5dc79SHONG Yifan# 7*bcb5dc79SHONG Yifan# http://www.apache.org/licenses/LICENSE-2.0 8*bcb5dc79SHONG Yifan# 9*bcb5dc79SHONG Yifan# Unless required by applicable law or agreed to in writing, software 10*bcb5dc79SHONG Yifan# distributed under the License is distributed on an "AS IS" BASIS, 11*bcb5dc79SHONG Yifan# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*bcb5dc79SHONG Yifan# See the License for the specific language governing permissions and 13*bcb5dc79SHONG Yifan# limitations under the License. 14*bcb5dc79SHONG Yifan 15*bcb5dc79SHONG Yifan"""Skylib module containing file path manipulation functions. 16*bcb5dc79SHONG Yifan 17*bcb5dc79SHONG YifanNOTE: The functions in this module currently only support paths with Unix-style 18*bcb5dc79SHONG Yifanpath separators (forward slash, "/"); they do not handle Windows-style paths 19*bcb5dc79SHONG Yifanwith backslash separators or drive letters. 20*bcb5dc79SHONG Yifan""" 21*bcb5dc79SHONG Yifan 22*bcb5dc79SHONG Yifandef _basename(p): 23*bcb5dc79SHONG Yifan """Returns the basename (i.e., the file portion) of a path. 24*bcb5dc79SHONG Yifan 25*bcb5dc79SHONG Yifan Note that if `p` ends with a slash, this function returns an empty string. 26*bcb5dc79SHONG Yifan This matches the behavior of Python's `os.path.basename`, but differs from 27*bcb5dc79SHONG Yifan the Unix `basename` command (which would return the path segment preceding 28*bcb5dc79SHONG Yifan the final slash). 29*bcb5dc79SHONG Yifan 30*bcb5dc79SHONG Yifan Args: 31*bcb5dc79SHONG Yifan p: The path whose basename should be returned. 32*bcb5dc79SHONG Yifan 33*bcb5dc79SHONG Yifan Returns: 34*bcb5dc79SHONG Yifan The basename of the path, which includes the extension. 35*bcb5dc79SHONG Yifan """ 36*bcb5dc79SHONG Yifan return p.rpartition("/")[-1] 37*bcb5dc79SHONG Yifan 38*bcb5dc79SHONG Yifandef _dirname(p): 39*bcb5dc79SHONG Yifan """Returns the dirname of a path. 40*bcb5dc79SHONG Yifan 41*bcb5dc79SHONG Yifan The dirname is the portion of `p` up to but not including the file portion 42*bcb5dc79SHONG Yifan (i.e., the basename). Any slashes immediately preceding the basename are not 43*bcb5dc79SHONG Yifan included, unless omitting them would make the dirname empty. 44*bcb5dc79SHONG Yifan 45*bcb5dc79SHONG Yifan Args: 46*bcb5dc79SHONG Yifan p: The path whose dirname should be returned. 47*bcb5dc79SHONG Yifan 48*bcb5dc79SHONG Yifan Returns: 49*bcb5dc79SHONG Yifan The dirname of the path. 50*bcb5dc79SHONG Yifan """ 51*bcb5dc79SHONG Yifan prefix, sep, _ = p.rpartition("/") 52*bcb5dc79SHONG Yifan if not prefix: 53*bcb5dc79SHONG Yifan return sep 54*bcb5dc79SHONG Yifan else: 55*bcb5dc79SHONG Yifan # If there are multiple consecutive slashes, strip them all out as Python's 56*bcb5dc79SHONG Yifan # os.path.dirname does. 57*bcb5dc79SHONG Yifan return prefix.rstrip("/") 58*bcb5dc79SHONG Yifan 59*bcb5dc79SHONG Yifandef _is_absolute(path): 60*bcb5dc79SHONG Yifan """Returns `True` if `path` is an absolute path. 61*bcb5dc79SHONG Yifan 62*bcb5dc79SHONG Yifan Args: 63*bcb5dc79SHONG Yifan path: A path (which is a string). 64*bcb5dc79SHONG Yifan 65*bcb5dc79SHONG Yifan Returns: 66*bcb5dc79SHONG Yifan `True` if `path` is an absolute path. 67*bcb5dc79SHONG Yifan """ 68*bcb5dc79SHONG Yifan return path.startswith("/") or (len(path) > 2 and path[1] == ":") 69*bcb5dc79SHONG Yifan 70*bcb5dc79SHONG Yifandef _join(path, *others): 71*bcb5dc79SHONG Yifan """Joins one or more path components intelligently. 72*bcb5dc79SHONG Yifan 73*bcb5dc79SHONG Yifan This function mimics the behavior of Python's `os.path.join` function on POSIX 74*bcb5dc79SHONG Yifan platform. It returns the concatenation of `path` and any members of `others`, 75*bcb5dc79SHONG Yifan inserting directory separators before each component except the first. The 76*bcb5dc79SHONG Yifan separator is not inserted if the path up until that point is either empty or 77*bcb5dc79SHONG Yifan already ends in a separator. 78*bcb5dc79SHONG Yifan 79*bcb5dc79SHONG Yifan If any component is an absolute path, all previous components are discarded. 80*bcb5dc79SHONG Yifan 81*bcb5dc79SHONG Yifan Args: 82*bcb5dc79SHONG Yifan path: A path segment. 83*bcb5dc79SHONG Yifan *others: Additional path segments. 84*bcb5dc79SHONG Yifan 85*bcb5dc79SHONG Yifan Returns: 86*bcb5dc79SHONG Yifan A string containing the joined paths. 87*bcb5dc79SHONG Yifan """ 88*bcb5dc79SHONG Yifan result = path 89*bcb5dc79SHONG Yifan 90*bcb5dc79SHONG Yifan for p in others: 91*bcb5dc79SHONG Yifan if _is_absolute(p): 92*bcb5dc79SHONG Yifan result = p 93*bcb5dc79SHONG Yifan elif not result or result.endswith("/"): 94*bcb5dc79SHONG Yifan result += p 95*bcb5dc79SHONG Yifan else: 96*bcb5dc79SHONG Yifan result += "/" + p 97*bcb5dc79SHONG Yifan 98*bcb5dc79SHONG Yifan return result 99*bcb5dc79SHONG Yifan 100*bcb5dc79SHONG Yifandef _normalize(path): 101*bcb5dc79SHONG Yifan """Normalizes a path, eliminating double slashes and other redundant segments. 102*bcb5dc79SHONG Yifan 103*bcb5dc79SHONG Yifan This function mimics the behavior of Python's `os.path.normpath` function on 104*bcb5dc79SHONG Yifan POSIX platforms; specifically: 105*bcb5dc79SHONG Yifan 106*bcb5dc79SHONG Yifan - If the entire path is empty, "." is returned. 107*bcb5dc79SHONG Yifan - All "." segments are removed, unless the path consists solely of a single 108*bcb5dc79SHONG Yifan "." segment. 109*bcb5dc79SHONG Yifan - Trailing slashes are removed, unless the path consists solely of slashes. 110*bcb5dc79SHONG Yifan - ".." segments are removed as long as there are corresponding segments 111*bcb5dc79SHONG Yifan earlier in the path to remove; otherwise, they are retained as leading ".." 112*bcb5dc79SHONG Yifan segments. 113*bcb5dc79SHONG Yifan - Single and double leading slashes are preserved, but three or more leading 114*bcb5dc79SHONG Yifan slashes are collapsed into a single leading slash. 115*bcb5dc79SHONG Yifan - Multiple adjacent internal slashes are collapsed into a single slash. 116*bcb5dc79SHONG Yifan 117*bcb5dc79SHONG Yifan Args: 118*bcb5dc79SHONG Yifan path: A path. 119*bcb5dc79SHONG Yifan 120*bcb5dc79SHONG Yifan Returns: 121*bcb5dc79SHONG Yifan The normalized path. 122*bcb5dc79SHONG Yifan """ 123*bcb5dc79SHONG Yifan if not path: 124*bcb5dc79SHONG Yifan return "." 125*bcb5dc79SHONG Yifan 126*bcb5dc79SHONG Yifan if path.startswith("//") and not path.startswith("///"): 127*bcb5dc79SHONG Yifan initial_slashes = 2 128*bcb5dc79SHONG Yifan elif path.startswith("/"): 129*bcb5dc79SHONG Yifan initial_slashes = 1 130*bcb5dc79SHONG Yifan else: 131*bcb5dc79SHONG Yifan initial_slashes = 0 132*bcb5dc79SHONG Yifan is_relative = (initial_slashes == 0) 133*bcb5dc79SHONG Yifan 134*bcb5dc79SHONG Yifan components = path.split("/") 135*bcb5dc79SHONG Yifan new_components = [] 136*bcb5dc79SHONG Yifan 137*bcb5dc79SHONG Yifan for component in components: 138*bcb5dc79SHONG Yifan if component in ("", "."): 139*bcb5dc79SHONG Yifan continue 140*bcb5dc79SHONG Yifan if component == "..": 141*bcb5dc79SHONG Yifan if new_components and new_components[-1] != "..": 142*bcb5dc79SHONG Yifan # Only pop the last segment if it isn't another "..". 143*bcb5dc79SHONG Yifan new_components.pop() 144*bcb5dc79SHONG Yifan elif is_relative: 145*bcb5dc79SHONG Yifan # Preserve leading ".." segments for relative paths. 146*bcb5dc79SHONG Yifan new_components.append(component) 147*bcb5dc79SHONG Yifan else: 148*bcb5dc79SHONG Yifan new_components.append(component) 149*bcb5dc79SHONG Yifan 150*bcb5dc79SHONG Yifan path = "/".join(new_components) 151*bcb5dc79SHONG Yifan if not is_relative: 152*bcb5dc79SHONG Yifan path = ("/" * initial_slashes) + path 153*bcb5dc79SHONG Yifan 154*bcb5dc79SHONG Yifan return path or "." 155*bcb5dc79SHONG Yifan 156*bcb5dc79SHONG Yifan_BASE = 0 157*bcb5dc79SHONG Yifan_SEPARATOR = 1 158*bcb5dc79SHONG Yifan_DOT = 2 159*bcb5dc79SHONG Yifan_DOTDOT = 3 160*bcb5dc79SHONG Yifan 161*bcb5dc79SHONG Yifandef _is_normalized(str, look_for_same_level_references = True): 162*bcb5dc79SHONG Yifan """Returns true if the passed path doesn't contain uplevel references "..". 163*bcb5dc79SHONG Yifan 164*bcb5dc79SHONG Yifan Also checks for single-dot references "." if look_for_same_level_references 165*bcb5dc79SHONG Yifan is `True.` 166*bcb5dc79SHONG Yifan 167*bcb5dc79SHONG Yifan Args: 168*bcb5dc79SHONG Yifan str: The path string to check. 169*bcb5dc79SHONG Yifan look_for_same_level_references: If True checks if path doesn't contain 170*bcb5dc79SHONG Yifan uplevel references ".." or single-dot references ".". 171*bcb5dc79SHONG Yifan 172*bcb5dc79SHONG Yifan Returns: 173*bcb5dc79SHONG Yifan True if the path is normalized, False otherwise. 174*bcb5dc79SHONG Yifan """ 175*bcb5dc79SHONG Yifan state = _SEPARATOR 176*bcb5dc79SHONG Yifan for c in str.elems(): 177*bcb5dc79SHONG Yifan is_separator = False 178*bcb5dc79SHONG Yifan if c == "/": 179*bcb5dc79SHONG Yifan is_separator = True 180*bcb5dc79SHONG Yifan 181*bcb5dc79SHONG Yifan if state == _BASE: 182*bcb5dc79SHONG Yifan if is_separator: 183*bcb5dc79SHONG Yifan state = _SEPARATOR 184*bcb5dc79SHONG Yifan else: 185*bcb5dc79SHONG Yifan state = _BASE 186*bcb5dc79SHONG Yifan elif state == _SEPARATOR: 187*bcb5dc79SHONG Yifan if is_separator: 188*bcb5dc79SHONG Yifan state = _SEPARATOR 189*bcb5dc79SHONG Yifan elif c == ".": 190*bcb5dc79SHONG Yifan state = _DOT 191*bcb5dc79SHONG Yifan else: 192*bcb5dc79SHONG Yifan state = _BASE 193*bcb5dc79SHONG Yifan elif state == _DOT: 194*bcb5dc79SHONG Yifan if is_separator: 195*bcb5dc79SHONG Yifan if look_for_same_level_references: 196*bcb5dc79SHONG Yifan # "." segment found. 197*bcb5dc79SHONG Yifan return False 198*bcb5dc79SHONG Yifan state = _SEPARATOR 199*bcb5dc79SHONG Yifan elif c == ".": 200*bcb5dc79SHONG Yifan state = _DOTDOT 201*bcb5dc79SHONG Yifan else: 202*bcb5dc79SHONG Yifan state = _BASE 203*bcb5dc79SHONG Yifan elif state == _DOTDOT: 204*bcb5dc79SHONG Yifan if is_separator: 205*bcb5dc79SHONG Yifan return False 206*bcb5dc79SHONG Yifan else: 207*bcb5dc79SHONG Yifan state = _BASE 208*bcb5dc79SHONG Yifan 209*bcb5dc79SHONG Yifan if state == _DOT: 210*bcb5dc79SHONG Yifan if look_for_same_level_references: 211*bcb5dc79SHONG Yifan # "." segment found. 212*bcb5dc79SHONG Yifan return False 213*bcb5dc79SHONG Yifan elif state == _DOTDOT: 214*bcb5dc79SHONG Yifan return False 215*bcb5dc79SHONG Yifan return True 216*bcb5dc79SHONG Yifan 217*bcb5dc79SHONG Yifandef _relativize(path, start): 218*bcb5dc79SHONG Yifan """Returns the portion of `path` that is relative to `start`. 219*bcb5dc79SHONG Yifan 220*bcb5dc79SHONG Yifan Because we do not have access to the underlying file system, this 221*bcb5dc79SHONG Yifan implementation differs slightly from Python's `os.path.relpath` in that it 222*bcb5dc79SHONG Yifan will fail if `path` is not beneath `start` (rather than use parent segments to 223*bcb5dc79SHONG Yifan walk up to the common file system root). 224*bcb5dc79SHONG Yifan 225*bcb5dc79SHONG Yifan Relativizing paths that start with parent directory references only works if 226*bcb5dc79SHONG Yifan the path both start with the same initial parent references. 227*bcb5dc79SHONG Yifan 228*bcb5dc79SHONG Yifan Args: 229*bcb5dc79SHONG Yifan path: The path to relativize. 230*bcb5dc79SHONG Yifan start: The ancestor path against which to relativize. 231*bcb5dc79SHONG Yifan 232*bcb5dc79SHONG Yifan Returns: 233*bcb5dc79SHONG Yifan The portion of `path` that is relative to `start`. 234*bcb5dc79SHONG Yifan """ 235*bcb5dc79SHONG Yifan segments = _normalize(path).split("/") 236*bcb5dc79SHONG Yifan start_segments = _normalize(start).split("/") 237*bcb5dc79SHONG Yifan if start_segments == ["."]: 238*bcb5dc79SHONG Yifan start_segments = [] 239*bcb5dc79SHONG Yifan start_length = len(start_segments) 240*bcb5dc79SHONG Yifan 241*bcb5dc79SHONG Yifan if (path.startswith("/") != start.startswith("/") or 242*bcb5dc79SHONG Yifan len(segments) < start_length): 243*bcb5dc79SHONG Yifan fail("Path '%s' is not beneath '%s'" % (path, start)) 244*bcb5dc79SHONG Yifan 245*bcb5dc79SHONG Yifan for ancestor_segment, segment in zip(start_segments, segments): 246*bcb5dc79SHONG Yifan if ancestor_segment != segment: 247*bcb5dc79SHONG Yifan fail("Path '%s' is not beneath '%s'" % (path, start)) 248*bcb5dc79SHONG Yifan 249*bcb5dc79SHONG Yifan length = len(segments) - start_length 250*bcb5dc79SHONG Yifan result_segments = segments[-length:] 251*bcb5dc79SHONG Yifan return "/".join(result_segments) 252*bcb5dc79SHONG Yifan 253*bcb5dc79SHONG Yifandef _replace_extension(p, new_extension): 254*bcb5dc79SHONG Yifan """Replaces the extension of the file at the end of a path. 255*bcb5dc79SHONG Yifan 256*bcb5dc79SHONG Yifan If the path has no extension, the new extension is added to it. 257*bcb5dc79SHONG Yifan 258*bcb5dc79SHONG Yifan Args: 259*bcb5dc79SHONG Yifan p: The path whose extension should be replaced. 260*bcb5dc79SHONG Yifan new_extension: The new extension for the file. The new extension should 261*bcb5dc79SHONG Yifan begin with a dot if you want the new filename to have one. 262*bcb5dc79SHONG Yifan 263*bcb5dc79SHONG Yifan Returns: 264*bcb5dc79SHONG Yifan The path with the extension replaced (or added, if it did not have one). 265*bcb5dc79SHONG Yifan """ 266*bcb5dc79SHONG Yifan return _split_extension(p)[0] + new_extension 267*bcb5dc79SHONG Yifan 268*bcb5dc79SHONG Yifandef _split_extension(p): 269*bcb5dc79SHONG Yifan """Splits the path `p` into a tuple containing the root and extension. 270*bcb5dc79SHONG Yifan 271*bcb5dc79SHONG Yifan Leading periods on the basename are ignored, so 272*bcb5dc79SHONG Yifan `path.split_extension(".bashrc")` returns `(".bashrc", "")`. 273*bcb5dc79SHONG Yifan 274*bcb5dc79SHONG Yifan Args: 275*bcb5dc79SHONG Yifan p: The path whose root and extension should be split. 276*bcb5dc79SHONG Yifan 277*bcb5dc79SHONG Yifan Returns: 278*bcb5dc79SHONG Yifan A tuple `(root, ext)` such that the root is the path without the file 279*bcb5dc79SHONG Yifan extension, and `ext` is the file extension (which, if non-empty, contains 280*bcb5dc79SHONG Yifan the leading dot). The returned tuple always satisfies the relationship 281*bcb5dc79SHONG Yifan `root + ext == p`. 282*bcb5dc79SHONG Yifan """ 283*bcb5dc79SHONG Yifan b = _basename(p) 284*bcb5dc79SHONG Yifan last_dot_in_basename = b.rfind(".") 285*bcb5dc79SHONG Yifan 286*bcb5dc79SHONG Yifan # If there is no dot or the only dot in the basename is at the front, then 287*bcb5dc79SHONG Yifan # there is no extension. 288*bcb5dc79SHONG Yifan if last_dot_in_basename <= 0: 289*bcb5dc79SHONG Yifan return (p, "") 290*bcb5dc79SHONG Yifan 291*bcb5dc79SHONG Yifan dot_distance_from_end = len(b) - last_dot_in_basename 292*bcb5dc79SHONG Yifan return (p[:-dot_distance_from_end], p[-dot_distance_from_end:]) 293*bcb5dc79SHONG Yifan 294*bcb5dc79SHONG Yifandef _starts_with(path_a, path_b): 295*bcb5dc79SHONG Yifan """Returns True if and only if path_b is an ancestor of path_a. 296*bcb5dc79SHONG Yifan 297*bcb5dc79SHONG Yifan Does not handle OS dependent case-insensitivity.""" 298*bcb5dc79SHONG Yifan if not path_b: 299*bcb5dc79SHONG Yifan # all paths start with the empty string 300*bcb5dc79SHONG Yifan return True 301*bcb5dc79SHONG Yifan norm_a = _normalize(path_a) 302*bcb5dc79SHONG Yifan norm_b = _normalize(path_b) 303*bcb5dc79SHONG Yifan if len(norm_b) > len(norm_a): 304*bcb5dc79SHONG Yifan return False 305*bcb5dc79SHONG Yifan if not norm_a.startswith(norm_b): 306*bcb5dc79SHONG Yifan return False 307*bcb5dc79SHONG Yifan return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/" 308*bcb5dc79SHONG Yifan 309*bcb5dc79SHONG Yifanpaths = struct( 310*bcb5dc79SHONG Yifan basename = _basename, 311*bcb5dc79SHONG Yifan dirname = _dirname, 312*bcb5dc79SHONG Yifan is_absolute = _is_absolute, 313*bcb5dc79SHONG Yifan join = _join, 314*bcb5dc79SHONG Yifan normalize = _normalize, 315*bcb5dc79SHONG Yifan is_normalized = _is_normalized, 316*bcb5dc79SHONG Yifan relativize = _relativize, 317*bcb5dc79SHONG Yifan replace_extension = _replace_extension, 318*bcb5dc79SHONG Yifan split_extension = _split_extension, 319*bcb5dc79SHONG Yifan starts_with = _starts_with, 320*bcb5dc79SHONG Yifan) 321