1*4947cdc7SCole Faust# Copyright 2017 The Bazel Authors. All rights reserved. 2*4947cdc7SCole Faust# 3*4947cdc7SCole Faust# Licensed under the Apache License, Version 2.0 (the "License"); 4*4947cdc7SCole Faust# you may not use this file except in compliance with the License. 5*4947cdc7SCole Faust# You may obtain a copy of the License at 6*4947cdc7SCole Faust# 7*4947cdc7SCole Faust# http://www.apache.org/licenses/LICENSE-2.0 8*4947cdc7SCole Faust# 9*4947cdc7SCole Faust# Unless required by applicable law or agreed to in writing, software 10*4947cdc7SCole Faust# distributed under the License is distributed on an "AS IS" BASIS, 11*4947cdc7SCole Faust# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*4947cdc7SCole Faust# See the License for the specific language governing permissions and 13*4947cdc7SCole Faust# limitations under the License. 14*4947cdc7SCole Faust 15*4947cdc7SCole Faust"""Skylib module containing file path manipulation functions. 16*4947cdc7SCole Faust 17*4947cdc7SCole FaustNOTE: The functions in this module currently only support paths with Unix-style 18*4947cdc7SCole Faustpath separators (forward slash, "/"); they do not handle Windows-style paths 19*4947cdc7SCole Faustwith backslash separators or drive letters. 20*4947cdc7SCole Faust""" 21*4947cdc7SCole Faust 22*4947cdc7SCole Faust# This file is in the Bazel build language dialect of Starlark, 23*4947cdc7SCole Faust# so declarations of 'fail' and 'struct' are required to make 24*4947cdc7SCole Faust# it compile in the core language. 25*4947cdc7SCole Faustdef fail(msg): 26*4947cdc7SCole Faust print(msg) 27*4947cdc7SCole Faust 28*4947cdc7SCole Fauststruct = dict 29*4947cdc7SCole Faust 30*4947cdc7SCole Faustdef _basename(p): 31*4947cdc7SCole Faust """Returns the basename (i.e., the file portion) of a path. 32*4947cdc7SCole Faust 33*4947cdc7SCole Faust Note that if `p` ends with a slash, this function returns an empty string. 34*4947cdc7SCole Faust This matches the behavior of Python's `os.path.basename`, but differs from 35*4947cdc7SCole Faust the Unix `basename` command (which would return the path segment preceding 36*4947cdc7SCole Faust the final slash). 37*4947cdc7SCole Faust 38*4947cdc7SCole Faust Args: 39*4947cdc7SCole Faust p: The path whose basename should be returned. 40*4947cdc7SCole Faust 41*4947cdc7SCole Faust Returns: 42*4947cdc7SCole Faust The basename of the path, which includes the extension. 43*4947cdc7SCole Faust """ 44*4947cdc7SCole Faust return p.rpartition("/")[-1] 45*4947cdc7SCole Faust 46*4947cdc7SCole Faustdef _dirname(p): 47*4947cdc7SCole Faust """Returns the dirname of a path. 48*4947cdc7SCole Faust 49*4947cdc7SCole Faust The dirname is the portion of `p` up to but not including the file portion 50*4947cdc7SCole Faust (i.e., the basename). Any slashes immediately preceding the basename are not 51*4947cdc7SCole Faust included, unless omitting them would make the dirname empty. 52*4947cdc7SCole Faust 53*4947cdc7SCole Faust Args: 54*4947cdc7SCole Faust p: The path whose dirname should be returned. 55*4947cdc7SCole Faust 56*4947cdc7SCole Faust Returns: 57*4947cdc7SCole Faust The dirname of the path. 58*4947cdc7SCole Faust """ 59*4947cdc7SCole Faust prefix, sep, _ = p.rpartition("/") 60*4947cdc7SCole Faust if not prefix: 61*4947cdc7SCole Faust return sep 62*4947cdc7SCole Faust else: 63*4947cdc7SCole Faust # If there are multiple consecutive slashes, strip them all out as Python's 64*4947cdc7SCole Faust # os.path.dirname does. 65*4947cdc7SCole Faust return prefix.rstrip("/") 66*4947cdc7SCole Faust 67*4947cdc7SCole Faustdef _is_absolute(path): 68*4947cdc7SCole Faust """Returns `True` if `path` is an absolute path. 69*4947cdc7SCole Faust 70*4947cdc7SCole Faust Args: 71*4947cdc7SCole Faust path: A path (which is a string). 72*4947cdc7SCole Faust 73*4947cdc7SCole Faust Returns: 74*4947cdc7SCole Faust `True` if `path` is an absolute path. 75*4947cdc7SCole Faust """ 76*4947cdc7SCole Faust return path.startswith("/") or (len(path) > 2 and path[1] == ":") 77*4947cdc7SCole Faust 78*4947cdc7SCole Faustdef _join(path, *others): 79*4947cdc7SCole Faust """Joins one or more path components intelligently. 80*4947cdc7SCole Faust 81*4947cdc7SCole Faust This function mimics the behavior of Python's `os.path.join` function on POSIX 82*4947cdc7SCole Faust platform. It returns the concatenation of `path` and any members of `others`, 83*4947cdc7SCole Faust inserting directory separators before each component except the first. The 84*4947cdc7SCole Faust separator is not inserted if the path up until that point is either empty or 85*4947cdc7SCole Faust already ends in a separator. 86*4947cdc7SCole Faust 87*4947cdc7SCole Faust If any component is an absolute path, all previous components are discarded. 88*4947cdc7SCole Faust 89*4947cdc7SCole Faust Args: 90*4947cdc7SCole Faust path: A path segment. 91*4947cdc7SCole Faust *others: Additional path segments. 92*4947cdc7SCole Faust 93*4947cdc7SCole Faust Returns: 94*4947cdc7SCole Faust A string containing the joined paths. 95*4947cdc7SCole Faust """ 96*4947cdc7SCole Faust result = path 97*4947cdc7SCole Faust 98*4947cdc7SCole Faust for p in others: 99*4947cdc7SCole Faust if _is_absolute(p): 100*4947cdc7SCole Faust result = p 101*4947cdc7SCole Faust elif not result or result.endswith("/"): 102*4947cdc7SCole Faust result += p 103*4947cdc7SCole Faust else: 104*4947cdc7SCole Faust result += "/" + p 105*4947cdc7SCole Faust 106*4947cdc7SCole Faust return result 107*4947cdc7SCole Faust 108*4947cdc7SCole Faustdef _normalize(path): 109*4947cdc7SCole Faust """Normalizes a path, eliminating double slashes and other redundant segments. 110*4947cdc7SCole Faust 111*4947cdc7SCole Faust This function mimics the behavior of Python's `os.path.normpath` function on 112*4947cdc7SCole Faust POSIX platforms; specifically: 113*4947cdc7SCole Faust 114*4947cdc7SCole Faust - If the entire path is empty, "." is returned. 115*4947cdc7SCole Faust - All "." segments are removed, unless the path consists solely of a single 116*4947cdc7SCole Faust "." segment. 117*4947cdc7SCole Faust - Trailing slashes are removed, unless the path consists solely of slashes. 118*4947cdc7SCole Faust - ".." segments are removed as long as there are corresponding segments 119*4947cdc7SCole Faust earlier in the path to remove; otherwise, they are retained as leading ".." 120*4947cdc7SCole Faust segments. 121*4947cdc7SCole Faust - Single and double leading slashes are preserved, but three or more leading 122*4947cdc7SCole Faust slashes are collapsed into a single leading slash. 123*4947cdc7SCole Faust - Multiple adjacent internal slashes are collapsed into a single slash. 124*4947cdc7SCole Faust 125*4947cdc7SCole Faust Args: 126*4947cdc7SCole Faust path: A path. 127*4947cdc7SCole Faust 128*4947cdc7SCole Faust Returns: 129*4947cdc7SCole Faust The normalized path. 130*4947cdc7SCole Faust """ 131*4947cdc7SCole Faust if not path: 132*4947cdc7SCole Faust return "." 133*4947cdc7SCole Faust 134*4947cdc7SCole Faust if path.startswith("//") and not path.startswith("///"): 135*4947cdc7SCole Faust initial_slashes = 2 136*4947cdc7SCole Faust elif path.startswith("/"): 137*4947cdc7SCole Faust initial_slashes = 1 138*4947cdc7SCole Faust else: 139*4947cdc7SCole Faust initial_slashes = 0 140*4947cdc7SCole Faust is_relative = (initial_slashes == 0) 141*4947cdc7SCole Faust 142*4947cdc7SCole Faust components = path.split("/") 143*4947cdc7SCole Faust new_components = [] 144*4947cdc7SCole Faust 145*4947cdc7SCole Faust for component in components: 146*4947cdc7SCole Faust if component in ("", "."): 147*4947cdc7SCole Faust continue 148*4947cdc7SCole Faust if component == "..": 149*4947cdc7SCole Faust if new_components and new_components[-1] != "..": 150*4947cdc7SCole Faust # Only pop the last segment if it isn't another "..". 151*4947cdc7SCole Faust new_components.pop() 152*4947cdc7SCole Faust elif is_relative: 153*4947cdc7SCole Faust # Preserve leading ".." segments for relative paths. 154*4947cdc7SCole Faust new_components.append(component) 155*4947cdc7SCole Faust else: 156*4947cdc7SCole Faust new_components.append(component) 157*4947cdc7SCole Faust 158*4947cdc7SCole Faust path = "/".join(new_components) 159*4947cdc7SCole Faust if not is_relative: 160*4947cdc7SCole Faust path = ("/" * initial_slashes) + path 161*4947cdc7SCole Faust 162*4947cdc7SCole Faust return path or "." 163*4947cdc7SCole Faust 164*4947cdc7SCole Faustdef _relativize(path, start): 165*4947cdc7SCole Faust """Returns the portion of `path` that is relative to `start`. 166*4947cdc7SCole Faust 167*4947cdc7SCole Faust Because we do not have access to the underlying file system, this 168*4947cdc7SCole Faust implementation differs slightly from Python's `os.path.relpath` in that it 169*4947cdc7SCole Faust will fail if `path` is not beneath `start` (rather than use parent segments to 170*4947cdc7SCole Faust walk up to the common file system root). 171*4947cdc7SCole Faust 172*4947cdc7SCole Faust Relativizing paths that start with parent directory references only works if 173*4947cdc7SCole Faust the path both start with the same initial parent references. 174*4947cdc7SCole Faust 175*4947cdc7SCole Faust Args: 176*4947cdc7SCole Faust path: The path to relativize. 177*4947cdc7SCole Faust start: The ancestor path against which to relativize. 178*4947cdc7SCole Faust 179*4947cdc7SCole Faust Returns: 180*4947cdc7SCole Faust The portion of `path` that is relative to `start`. 181*4947cdc7SCole Faust """ 182*4947cdc7SCole Faust segments = _normalize(path).split("/") 183*4947cdc7SCole Faust start_segments = _normalize(start).split("/") 184*4947cdc7SCole Faust if start_segments == ["."]: 185*4947cdc7SCole Faust start_segments = [] 186*4947cdc7SCole Faust start_length = len(start_segments) 187*4947cdc7SCole Faust 188*4947cdc7SCole Faust if (path.startswith("/") != start.startswith("/") or 189*4947cdc7SCole Faust len(segments) < start_length): 190*4947cdc7SCole Faust fail("Path '%s' is not beneath '%s'" % (path, start)) 191*4947cdc7SCole Faust 192*4947cdc7SCole Faust for ancestor_segment, segment in zip(start_segments, segments): 193*4947cdc7SCole Faust if ancestor_segment != segment: 194*4947cdc7SCole Faust fail("Path '%s' is not beneath '%s'" % (path, start)) 195*4947cdc7SCole Faust 196*4947cdc7SCole Faust length = len(segments) - start_length 197*4947cdc7SCole Faust result_segments = segments[-length:] 198*4947cdc7SCole Faust return "/".join(result_segments) 199*4947cdc7SCole Faust 200*4947cdc7SCole Faustdef _replace_extension(p, new_extension): 201*4947cdc7SCole Faust """Replaces the extension of the file at the end of a path. 202*4947cdc7SCole Faust 203*4947cdc7SCole Faust If the path has no extension, the new extension is added to it. 204*4947cdc7SCole Faust 205*4947cdc7SCole Faust Args: 206*4947cdc7SCole Faust p: The path whose extension should be replaced. 207*4947cdc7SCole Faust new_extension: The new extension for the file. The new extension should 208*4947cdc7SCole Faust begin with a dot if you want the new filename to have one. 209*4947cdc7SCole Faust 210*4947cdc7SCole Faust Returns: 211*4947cdc7SCole Faust The path with the extension replaced (or added, if it did not have one). 212*4947cdc7SCole Faust """ 213*4947cdc7SCole Faust return _split_extension(p)[0] + new_extension 214*4947cdc7SCole Faust 215*4947cdc7SCole Faustdef _split_extension(p): 216*4947cdc7SCole Faust """Splits the path `p` into a tuple containing the root and extension. 217*4947cdc7SCole Faust 218*4947cdc7SCole Faust Leading periods on the basename are ignored, so 219*4947cdc7SCole Faust `path.split_extension(".bashrc")` returns `(".bashrc", "")`. 220*4947cdc7SCole Faust 221*4947cdc7SCole Faust Args: 222*4947cdc7SCole Faust p: The path whose root and extension should be split. 223*4947cdc7SCole Faust 224*4947cdc7SCole Faust Returns: 225*4947cdc7SCole Faust A tuple `(root, ext)` such that the root is the path without the file 226*4947cdc7SCole Faust extension, and `ext` is the file extension (which, if non-empty, contains 227*4947cdc7SCole Faust the leading dot). The returned tuple always satisfies the relationship 228*4947cdc7SCole Faust `root + ext == p`. 229*4947cdc7SCole Faust """ 230*4947cdc7SCole Faust b = _basename(p) 231*4947cdc7SCole Faust last_dot_in_basename = b.rfind(".") 232*4947cdc7SCole Faust 233*4947cdc7SCole Faust # If there is no dot or the only dot in the basename is at the front, then 234*4947cdc7SCole Faust # there is no extension. 235*4947cdc7SCole Faust if last_dot_in_basename <= 0: 236*4947cdc7SCole Faust return (p, "") 237*4947cdc7SCole Faust 238*4947cdc7SCole Faust dot_distance_from_end = len(b) - last_dot_in_basename 239*4947cdc7SCole Faust return (p[:-dot_distance_from_end], p[-dot_distance_from_end:]) 240*4947cdc7SCole Faust 241*4947cdc7SCole Faustpaths = struct( 242*4947cdc7SCole Faust basename = _basename, 243*4947cdc7SCole Faust dirname = _dirname, 244*4947cdc7SCole Faust is_absolute = _is_absolute, 245*4947cdc7SCole Faust join = _join, 246*4947cdc7SCole Faust normalize = _normalize, 247*4947cdc7SCole Faust relativize = _relativize, 248*4947cdc7SCole Faust replace_extension = _replace_extension, 249*4947cdc7SCole Faust split_extension = _split_extension, 250*4947cdc7SCole Faust) 251