1"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path.  The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13# Strings representing various path-related bits and pieces.
14# These are primarily for export; internally, they are hardcoded.
15# Should be set before imports for resolving cyclic dependency.
16curdir = '.'
17pardir = '..'
18extsep = '.'
19sep = '/'
20pathsep = ':'
21defpath = '/bin:/usr/bin'
22altsep = None
23devnull = '/dev/null'
24
25import os
26import sys
27import stat
28import genericpath
29from genericpath import *
30
31__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
32           "basename","dirname","commonprefix","getsize","getmtime",
33           "getatime","getctime","islink","exists","lexists","isdir","isfile",
34           "ismount", "expanduser","expandvars","normpath","abspath",
35           "samefile","sameopenfile","samestat",
36           "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
37           "devnull","realpath","supports_unicode_filenames","relpath",
38           "commonpath"]
39
40
41def _get_sep(path):
42    if isinstance(path, bytes):
43        return b'/'
44    else:
45        return '/'
46
47# Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
48# On MS-DOS this may also turn slashes into backslashes; however, other
49# normalizations (such as optimizing '../' away) are not allowed
50# (another function should be defined to do that).
51
52def normcase(s):
53    """Normalize case of pathname.  Has no effect under Posix"""
54    return os.fspath(s)
55
56
57# Return whether a path is absolute.
58# Trivial in Posix, harder on the Mac or MS-DOS.
59
60def isabs(s):
61    """Test whether a path is absolute"""
62    s = os.fspath(s)
63    sep = _get_sep(s)
64    return s.startswith(sep)
65
66
67# Join pathnames.
68# Ignore the previous parts if a part is absolute.
69# Insert a '/' unless the first part is empty or already ends in '/'.
70
71def join(a, *p):
72    """Join two or more pathname components, inserting '/' as needed.
73    If any component is an absolute path, all previous path components
74    will be discarded.  An empty last part will result in a path that
75    ends with a separator."""
76    a = os.fspath(a)
77    sep = _get_sep(a)
78    path = a
79    try:
80        if not p:
81            path[:0] + sep  #23780: Ensure compatible data type even if p is null.
82        for b in map(os.fspath, p):
83            if b.startswith(sep):
84                path = b
85            elif not path or path.endswith(sep):
86                path += b
87            else:
88                path += sep + b
89    except (TypeError, AttributeError, BytesWarning):
90        genericpath._check_arg_types('join', a, *p)
91        raise
92    return path
93
94
95# Split a path in head (everything up to the last '/') and tail (the
96# rest).  If the path ends in '/', tail will be empty.  If there is no
97# '/' in the path, head  will be empty.
98# Trailing '/'es are stripped from head unless it is the root.
99
100def split(p):
101    """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
102    everything after the final slash.  Either part may be empty."""
103    p = os.fspath(p)
104    sep = _get_sep(p)
105    i = p.rfind(sep) + 1
106    head, tail = p[:i], p[i:]
107    if head and head != sep*len(head):
108        head = head.rstrip(sep)
109    return head, tail
110
111
112# Split a path in root and extension.
113# The extension is everything starting at the last dot in the last
114# pathname component; the root is everything before that.
115# It is always true that root + ext == p.
116
117def splitext(p):
118    p = os.fspath(p)
119    if isinstance(p, bytes):
120        sep = b'/'
121        extsep = b'.'
122    else:
123        sep = '/'
124        extsep = '.'
125    return genericpath._splitext(p, sep, None, extsep)
126splitext.__doc__ = genericpath._splitext.__doc__
127
128# Split a pathname into a drive specification and the rest of the
129# path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
130
131def splitdrive(p):
132    """Split a pathname into drive and path. On Posix, drive is always
133    empty."""
134    p = os.fspath(p)
135    return p[:0], p
136
137
138# Return the tail (basename) part of a path, same as split(path)[1].
139
140def basename(p):
141    """Returns the final component of a pathname"""
142    p = os.fspath(p)
143    sep = _get_sep(p)
144    i = p.rfind(sep) + 1
145    return p[i:]
146
147
148# Return the head (dirname) part of a path, same as split(path)[0].
149
150def dirname(p):
151    """Returns the directory component of a pathname"""
152    p = os.fspath(p)
153    sep = _get_sep(p)
154    i = p.rfind(sep) + 1
155    head = p[:i]
156    if head and head != sep*len(head):
157        head = head.rstrip(sep)
158    return head
159
160
161# Is a path a symbolic link?
162# This will always return false on systems where os.lstat doesn't exist.
163
164def islink(path):
165    """Test whether a path is a symbolic link"""
166    try:
167        st = os.lstat(path)
168    except (OSError, ValueError, AttributeError):
169        return False
170    return stat.S_ISLNK(st.st_mode)
171
172# Being true for dangling symbolic links is also useful.
173
174def lexists(path):
175    """Test whether a path exists.  Returns True for broken symbolic links"""
176    try:
177        os.lstat(path)
178    except (OSError, ValueError):
179        return False
180    return True
181
182
183# Is a path a mount point?
184# (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
185
186def ismount(path):
187    """Test whether a path is a mount point"""
188    try:
189        s1 = os.lstat(path)
190    except (OSError, ValueError):
191        # It doesn't exist -- so not a mount point. :-)
192        return False
193    else:
194        # A symlink can never be a mount point
195        if stat.S_ISLNK(s1.st_mode):
196            return False
197
198    path = os.fspath(path)
199    if isinstance(path, bytes):
200        parent = join(path, b'..')
201    else:
202        parent = join(path, '..')
203    parent = realpath(parent)
204    try:
205        s2 = os.lstat(parent)
206    except (OSError, ValueError):
207        return False
208
209    dev1 = s1.st_dev
210    dev2 = s2.st_dev
211    if dev1 != dev2:
212        return True     # path/.. on a different device as path
213    ino1 = s1.st_ino
214    ino2 = s2.st_ino
215    if ino1 == ino2:
216        return True     # path/.. is the same i-node as path
217    return False
218
219
220# Expand paths beginning with '~' or '~user'.
221# '~' means $HOME; '~user' means that user's home directory.
222# If the path doesn't begin with '~', or if the user or $HOME is unknown,
223# the path is returned unchanged (leaving error reporting to whatever
224# function is called with the expanded path as argument).
225# See also module 'glob' for expansion of *, ? and [...] in pathnames.
226# (A function should also be defined to do full *sh-style environment
227# variable expansion.)
228
229def expanduser(path):
230    """Expand ~ and ~user constructions.  If user or $HOME is unknown,
231    do nothing."""
232    path = os.fspath(path)
233    if isinstance(path, bytes):
234        tilde = b'~'
235    else:
236        tilde = '~'
237    if not path.startswith(tilde):
238        return path
239    sep = _get_sep(path)
240    i = path.find(sep, 1)
241    if i < 0:
242        i = len(path)
243    if i == 1:
244        if 'HOME' not in os.environ:
245            try:
246                import pwd
247            except ImportError:
248                # pwd module unavailable, return path unchanged
249                return path
250            try:
251                userhome = pwd.getpwuid(os.getuid()).pw_dir
252            except KeyError:
253                # bpo-10496: if the current user identifier doesn't exist in the
254                # password database, return the path unchanged
255                return path
256        else:
257            userhome = os.environ['HOME']
258    else:
259        try:
260            import pwd
261        except ImportError:
262            # pwd module unavailable, return path unchanged
263            return path
264        name = path[1:i]
265        if isinstance(name, bytes):
266            name = str(name, 'ASCII')
267        try:
268            pwent = pwd.getpwnam(name)
269        except KeyError:
270            # bpo-10496: if the user name from the path doesn't exist in the
271            # password database, return the path unchanged
272            return path
273        userhome = pwent.pw_dir
274    # if no user home, return the path unchanged on VxWorks
275    if userhome is None and sys.platform == "vxworks":
276        return path
277    if isinstance(path, bytes):
278        userhome = os.fsencode(userhome)
279        root = b'/'
280    else:
281        root = '/'
282    userhome = userhome.rstrip(root)
283    return (userhome + path[i:]) or root
284
285
286# Expand paths containing shell variable substitutions.
287# This expands the forms $variable and ${variable} only.
288# Non-existent variables are left unchanged.
289
290_varprog = None
291_varprogb = None
292
293def expandvars(path):
294    """Expand shell variables of form $var and ${var}.  Unknown variables
295    are left unchanged."""
296    path = os.fspath(path)
297    global _varprog, _varprogb
298    if isinstance(path, bytes):
299        if b'$' not in path:
300            return path
301        if not _varprogb:
302            import re
303            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
304        search = _varprogb.search
305        start = b'{'
306        end = b'}'
307        environ = getattr(os, 'environb', None)
308    else:
309        if '$' not in path:
310            return path
311        if not _varprog:
312            import re
313            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
314        search = _varprog.search
315        start = '{'
316        end = '}'
317        environ = os.environ
318    i = 0
319    while True:
320        m = search(path, i)
321        if not m:
322            break
323        i, j = m.span(0)
324        name = m.group(1)
325        if name.startswith(start) and name.endswith(end):
326            name = name[1:-1]
327        try:
328            if environ is None:
329                value = os.fsencode(os.environ[os.fsdecode(name)])
330            else:
331                value = environ[name]
332        except KeyError:
333            i = j
334        else:
335            tail = path[j:]
336            path = path[:i] + value
337            i = len(path)
338            path += tail
339    return path
340
341
342# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
343# It should be understood that this may change the meaning of the path
344# if it contains symbolic links!
345
346try:
347    from posix import _path_normpath
348
349except ImportError:
350    def normpath(path):
351        """Normalize path, eliminating double slashes, etc."""
352        path = os.fspath(path)
353        if isinstance(path, bytes):
354            sep = b'/'
355            empty = b''
356            dot = b'.'
357            dotdot = b'..'
358        else:
359            sep = '/'
360            empty = ''
361            dot = '.'
362            dotdot = '..'
363        if path == empty:
364            return dot
365        initial_slashes = path.startswith(sep)
366        # POSIX allows one or two initial slashes, but treats three or more
367        # as single slash.
368        # (see https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)
369        if (initial_slashes and
370            path.startswith(sep*2) and not path.startswith(sep*3)):
371            initial_slashes = 2
372        comps = path.split(sep)
373        new_comps = []
374        for comp in comps:
375            if comp in (empty, dot):
376                continue
377            if (comp != dotdot or (not initial_slashes and not new_comps) or
378                 (new_comps and new_comps[-1] == dotdot)):
379                new_comps.append(comp)
380            elif new_comps:
381                new_comps.pop()
382        comps = new_comps
383        path = sep.join(comps)
384        if initial_slashes:
385            path = sep*initial_slashes + path
386        return path or dot
387
388else:
389    def normpath(path):
390        """Normalize path, eliminating double slashes, etc."""
391        path = os.fspath(path)
392        if isinstance(path, bytes):
393            return os.fsencode(_path_normpath(os.fsdecode(path))) or b"."
394        return _path_normpath(path) or "."
395
396
397def abspath(path):
398    """Return an absolute path."""
399    path = os.fspath(path)
400    if not isabs(path):
401        if isinstance(path, bytes):
402            cwd = os.getcwdb()
403        else:
404            cwd = os.getcwd()
405        path = join(cwd, path)
406    return normpath(path)
407
408
409# Return a canonical path (i.e. the absolute location of a file on the
410# filesystem).
411
412def realpath(filename, *, strict=False):
413    """Return the canonical path of the specified filename, eliminating any
414symbolic links encountered in the path."""
415    filename = os.fspath(filename)
416    path, ok = _joinrealpath(filename[:0], filename, strict, {})
417    return abspath(path)
418
419# Join two paths, normalizing and eliminating any symbolic links
420# encountered in the second path.
421def _joinrealpath(path, rest, strict, seen):
422    if isinstance(path, bytes):
423        sep = b'/'
424        curdir = b'.'
425        pardir = b'..'
426    else:
427        sep = '/'
428        curdir = '.'
429        pardir = '..'
430
431    if isabs(rest):
432        rest = rest[1:]
433        path = sep
434
435    while rest:
436        name, _, rest = rest.partition(sep)
437        if not name or name == curdir:
438            # current dir
439            continue
440        if name == pardir:
441            # parent dir
442            if path:
443                path, name = split(path)
444                if name == pardir:
445                    path = join(path, pardir, pardir)
446            else:
447                path = pardir
448            continue
449        newpath = join(path, name)
450        try:
451            st = os.lstat(newpath)
452        except OSError:
453            if strict:
454                raise
455            is_link = False
456        else:
457            is_link = stat.S_ISLNK(st.st_mode)
458        if not is_link:
459            path = newpath
460            continue
461        # Resolve the symbolic link
462        if newpath in seen:
463            # Already seen this path
464            path = seen[newpath]
465            if path is not None:
466                # use cached value
467                continue
468            # The symlink is not resolved, so we must have a symlink loop.
469            if strict:
470                # Raise OSError(errno.ELOOP)
471                os.stat(newpath)
472            else:
473                # Return already resolved part + rest of the path unchanged.
474                return join(newpath, rest), False
475        seen[newpath] = None # not resolved symlink
476        path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
477        if not ok:
478            return join(path, rest), False
479        seen[newpath] = path # resolved symlink
480
481    return path, True
482
483
484supports_unicode_filenames = (sys.platform == 'darwin')
485
486def relpath(path, start=None):
487    """Return a relative version of a path"""
488
489    if not path:
490        raise ValueError("no path specified")
491
492    path = os.fspath(path)
493    if isinstance(path, bytes):
494        curdir = b'.'
495        sep = b'/'
496        pardir = b'..'
497    else:
498        curdir = '.'
499        sep = '/'
500        pardir = '..'
501
502    if start is None:
503        start = curdir
504    else:
505        start = os.fspath(start)
506
507    try:
508        start_list = [x for x in abspath(start).split(sep) if x]
509        path_list = [x for x in abspath(path).split(sep) if x]
510        # Work out how much of the filepath is shared by start and path.
511        i = len(commonprefix([start_list, path_list]))
512
513        rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
514        if not rel_list:
515            return curdir
516        return join(*rel_list)
517    except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
518        genericpath._check_arg_types('relpath', path, start)
519        raise
520
521
522# Return the longest common sub-path of the sequence of paths given as input.
523# The paths are not normalized before comparing them (this is the
524# responsibility of the caller). Any trailing separator is stripped from the
525# returned path.
526
527def commonpath(paths):
528    """Given a sequence of path names, returns the longest common sub-path."""
529
530    if not paths:
531        raise ValueError('commonpath() arg is an empty sequence')
532
533    paths = tuple(map(os.fspath, paths))
534    if isinstance(paths[0], bytes):
535        sep = b'/'
536        curdir = b'.'
537    else:
538        sep = '/'
539        curdir = '.'
540
541    try:
542        split_paths = [path.split(sep) for path in paths]
543
544        try:
545            isabs, = set(p[:1] == sep for p in paths)
546        except ValueError:
547            raise ValueError("Can't mix absolute and relative paths") from None
548
549        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
550        s1 = min(split_paths)
551        s2 = max(split_paths)
552        common = s1
553        for i, c in enumerate(s1):
554            if c != s2[i]:
555                common = s1[:i]
556                break
557
558        prefix = sep if isabs else sep[:0]
559        return prefix + sep.join(common)
560    except (TypeError, AttributeError):
561        genericpath._check_arg_types('commonpath', *paths)
562        raise
563