1"""Filename globbing utility.""" 2 3import contextlib 4import os 5import re 6import fnmatch 7import itertools 8import stat 9import sys 10 11__all__ = ["glob", "iglob", "escape"] 12 13def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, 14 include_hidden=False): 15 """Return a list of paths matching a pathname pattern. 16 17 The pattern may contain simple shell-style wildcards a la 18 fnmatch. Unlike fnmatch, filenames starting with a 19 dot are special cases that are not matched by '*' and '?' 20 patterns by default. 21 22 If `include_hidden` is true, the patterns '*', '?', '**' will match hidden 23 directories. 24 25 If `recursive` is true, the pattern '**' will match any files and 26 zero or more directories and subdirectories. 27 """ 28 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive, 29 include_hidden=include_hidden)) 30 31def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, 32 include_hidden=False): 33 """Return an iterator which yields the paths matching a pathname pattern. 34 35 The pattern may contain simple shell-style wildcards a la 36 fnmatch. However, unlike fnmatch, filenames starting with a 37 dot are special cases that are not matched by '*' and '?' 38 patterns. 39 40 If recursive is true, the pattern '**' will match any files and 41 zero or more directories and subdirectories. 42 """ 43 sys.audit("glob.glob", pathname, recursive) 44 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) 45 if root_dir is not None: 46 root_dir = os.fspath(root_dir) 47 else: 48 root_dir = pathname[:0] 49 it = _iglob(pathname, root_dir, dir_fd, recursive, False, 50 include_hidden=include_hidden) 51 if not pathname or recursive and _isrecursive(pathname[:2]): 52 try: 53 s = next(it) # skip empty string 54 if s: 55 it = itertools.chain((s,), it) 56 except StopIteration: 57 pass 58 return it 59 60def _iglob(pathname, root_dir, dir_fd, recursive, dironly, 61 include_hidden=False): 62 dirname, basename = os.path.split(pathname) 63 if not has_magic(pathname): 64 assert not dironly 65 if basename: 66 if _lexists(_join(root_dir, pathname), dir_fd): 67 yield pathname 68 else: 69 # Patterns ending with a slash should match only directories 70 if _isdir(_join(root_dir, dirname), dir_fd): 71 yield pathname 72 return 73 if not dirname: 74 if recursive and _isrecursive(basename): 75 yield from _glob2(root_dir, basename, dir_fd, dironly, 76 include_hidden=include_hidden) 77 else: 78 yield from _glob1(root_dir, basename, dir_fd, dironly, 79 include_hidden=include_hidden) 80 return 81 # `os.path.split()` returns the argument itself as a dirname if it is a 82 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path 83 # contains magic characters (i.e. r'\\?\C:'). 84 if dirname != pathname and has_magic(dirname): 85 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True, 86 include_hidden=include_hidden) 87 else: 88 dirs = [dirname] 89 if has_magic(basename): 90 if recursive and _isrecursive(basename): 91 glob_in_dir = _glob2 92 else: 93 glob_in_dir = _glob1 94 else: 95 glob_in_dir = _glob0 96 for dirname in dirs: 97 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly, 98 include_hidden=include_hidden): 99 yield os.path.join(dirname, name) 100 101# These 2 helper functions non-recursively glob inside a literal directory. 102# They return a list of basenames. _glob1 accepts a pattern while _glob0 103# takes a literal basename (so it only has to check for its existence). 104 105def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): 106 names = _listdir(dirname, dir_fd, dironly) 107 if include_hidden or not _ishidden(pattern): 108 names = (x for x in names if include_hidden or not _ishidden(x)) 109 return fnmatch.filter(names, pattern) 110 111def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): 112 if basename: 113 if _lexists(_join(dirname, basename), dir_fd): 114 return [basename] 115 else: 116 # `os.path.split()` returns an empty basename for paths ending with a 117 # directory separator. 'q*x/' should match only directories. 118 if _isdir(dirname, dir_fd): 119 return [basename] 120 return [] 121 122# Following functions are not public but can be used by third-party code. 123 124def glob0(dirname, pattern): 125 return _glob0(dirname, pattern, None, False) 126 127def glob1(dirname, pattern): 128 return _glob1(dirname, pattern, None, False) 129 130# This helper function recursively yields relative pathnames inside a literal 131# directory. 132 133def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): 134 assert _isrecursive(pattern) 135 yield pattern[:0] 136 yield from _rlistdir(dirname, dir_fd, dironly, 137 include_hidden=include_hidden) 138 139# If dironly is false, yields all file names inside a directory. 140# If dironly is true, yields only directory names. 141def _iterdir(dirname, dir_fd, dironly): 142 try: 143 fd = None 144 fsencode = None 145 if dir_fd is not None: 146 if dirname: 147 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd) 148 else: 149 arg = dir_fd 150 if isinstance(dirname, bytes): 151 fsencode = os.fsencode 152 elif dirname: 153 arg = dirname 154 elif isinstance(dirname, bytes): 155 arg = bytes(os.curdir, 'ASCII') 156 else: 157 arg = os.curdir 158 try: 159 with os.scandir(arg) as it: 160 for entry in it: 161 try: 162 if not dironly or entry.is_dir(): 163 if fsencode is not None: 164 yield fsencode(entry.name) 165 else: 166 yield entry.name 167 except OSError: 168 pass 169 finally: 170 if fd is not None: 171 os.close(fd) 172 except OSError: 173 return 174 175def _listdir(dirname, dir_fd, dironly): 176 with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it: 177 return list(it) 178 179# Recursively yields relative pathnames inside a literal directory. 180def _rlistdir(dirname, dir_fd, dironly, include_hidden=False): 181 names = _listdir(dirname, dir_fd, dironly) 182 for x in names: 183 if include_hidden or not _ishidden(x): 184 yield x 185 path = _join(dirname, x) if dirname else x 186 for y in _rlistdir(path, dir_fd, dironly, 187 include_hidden=include_hidden): 188 yield _join(x, y) 189 190 191def _lexists(pathname, dir_fd): 192 # Same as os.path.lexists(), but with dir_fd 193 if dir_fd is None: 194 return os.path.lexists(pathname) 195 try: 196 os.lstat(pathname, dir_fd=dir_fd) 197 except (OSError, ValueError): 198 return False 199 else: 200 return True 201 202def _isdir(pathname, dir_fd): 203 # Same as os.path.isdir(), but with dir_fd 204 if dir_fd is None: 205 return os.path.isdir(pathname) 206 try: 207 st = os.stat(pathname, dir_fd=dir_fd) 208 except (OSError, ValueError): 209 return False 210 else: 211 return stat.S_ISDIR(st.st_mode) 212 213def _join(dirname, basename): 214 # It is common if dirname or basename is empty 215 if not dirname or not basename: 216 return dirname or basename 217 return os.path.join(dirname, basename) 218 219magic_check = re.compile('([*?[])') 220magic_check_bytes = re.compile(b'([*?[])') 221 222def has_magic(s): 223 if isinstance(s, bytes): 224 match = magic_check_bytes.search(s) 225 else: 226 match = magic_check.search(s) 227 return match is not None 228 229def _ishidden(path): 230 return path[0] in ('.', b'.'[0]) 231 232def _isrecursive(pattern): 233 if isinstance(pattern, bytes): 234 return pattern == b'**' 235 else: 236 return pattern == '**' 237 238def escape(pathname): 239 """Escape all special characters. 240 """ 241 # Escaping is done by wrapping any of "*?[" between square brackets. 242 # Metacharacters do not work in the drive part and shouldn't be escaped. 243 drive, pathname = os.path.splitdrive(pathname) 244 if isinstance(pathname, bytes): 245 pathname = magic_check_bytes.sub(br'[\1]', pathname) 246 else: 247 pathname = magic_check.sub(r'[\1]', pathname) 248 return drive + pathname 249 250 251_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) 252