1"""Find modules used by a script, using introspection.""" 2 3import dis 4import importlib._bootstrap_external 5import importlib.machinery 6import marshal 7import os 8import io 9import sys 10 11# Old imp constants: 12 13_SEARCH_ERROR = 0 14_PY_SOURCE = 1 15_PY_COMPILED = 2 16_C_EXTENSION = 3 17_PKG_DIRECTORY = 5 18_C_BUILTIN = 6 19_PY_FROZEN = 7 20 21# Modulefinder does a good job at simulating Python's, but it can not 22# handle __path__ modifications packages make at runtime. Therefore there 23# is a mechanism whereby you can register extra paths in this map for a 24# package, and it will be honored. 25 26# Note this is a mapping is lists of paths. 27packagePathMap = {} 28 29# A Public interface 30def AddPackagePath(packagename, path): 31 packagePathMap.setdefault(packagename, []).append(path) 32 33replacePackageMap = {} 34 35# This ReplacePackage mechanism allows modulefinder to work around 36# situations in which a package injects itself under the name 37# of another package into sys.modules at runtime by calling 38# ReplacePackage("real_package_name", "faked_package_name") 39# before running ModuleFinder. 40 41def ReplacePackage(oldname, newname): 42 replacePackageMap[oldname] = newname 43 44 45def _find_module(name, path=None): 46 """An importlib reimplementation of imp.find_module (for our purposes).""" 47 48 # It's necessary to clear the caches for our Finder first, in case any 49 # modules are being added/deleted/modified at runtime. In particular, 50 # test_modulefinder.py changes file tree contents in a cache-breaking way: 51 52 importlib.machinery.PathFinder.invalidate_caches() 53 54 spec = importlib.machinery.PathFinder.find_spec(name, path) 55 56 if spec is None: 57 raise ImportError("No module named {name!r}".format(name=name), name=name) 58 59 # Some special cases: 60 61 if spec.loader is importlib.machinery.BuiltinImporter: 62 return None, None, ("", "", _C_BUILTIN) 63 64 if spec.loader is importlib.machinery.FrozenImporter: 65 return None, None, ("", "", _PY_FROZEN) 66 67 file_path = spec.origin 68 69 if spec.loader.is_package(name): 70 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) 71 72 if isinstance(spec.loader, importlib.machinery.SourceFileLoader): 73 kind = _PY_SOURCE 74 75 elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader): 76 kind = _C_EXTENSION 77 78 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): 79 kind = _PY_COMPILED 80 81 else: # Should never happen. 82 return None, None, ("", "", _SEARCH_ERROR) 83 84 file = io.open_code(file_path) 85 suffix = os.path.splitext(file_path)[-1] 86 87 return file, file_path, (suffix, "rb", kind) 88 89 90class Module: 91 92 def __init__(self, name, file=None, path=None): 93 self.__name__ = name 94 self.__file__ = file 95 self.__path__ = path 96 self.__code__ = None 97 # The set of global names that are assigned to in the module. 98 # This includes those names imported through starimports of 99 # Python modules. 100 self.globalnames = {} 101 # The set of starimports this module did that could not be 102 # resolved, ie. a starimport from a non-Python module. 103 self.starimports = {} 104 105 def __repr__(self): 106 s = "Module(%r" % (self.__name__,) 107 if self.__file__ is not None: 108 s = s + ", %r" % (self.__file__,) 109 if self.__path__ is not None: 110 s = s + ", %r" % (self.__path__,) 111 s = s + ")" 112 return s 113 114class ModuleFinder: 115 116 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): 117 if path is None: 118 path = sys.path 119 self.path = path 120 self.modules = {} 121 self.badmodules = {} 122 self.debug = debug 123 self.indent = 0 124 self.excludes = excludes if excludes is not None else [] 125 self.replace_paths = replace_paths if replace_paths is not None else [] 126 self.processed_paths = [] # Used in debugging only 127 128 def msg(self, level, str, *args): 129 if level <= self.debug: 130 for i in range(self.indent): 131 print(" ", end=' ') 132 print(str, end=' ') 133 for arg in args: 134 print(repr(arg), end=' ') 135 print() 136 137 def msgin(self, *args): 138 level = args[0] 139 if level <= self.debug: 140 self.indent = self.indent + 1 141 self.msg(*args) 142 143 def msgout(self, *args): 144 level = args[0] 145 if level <= self.debug: 146 self.indent = self.indent - 1 147 self.msg(*args) 148 149 def run_script(self, pathname): 150 self.msg(2, "run_script", pathname) 151 with io.open_code(pathname) as fp: 152 stuff = ("", "rb", _PY_SOURCE) 153 self.load_module('__main__', fp, pathname, stuff) 154 155 def load_file(self, pathname): 156 dir, name = os.path.split(pathname) 157 name, ext = os.path.splitext(name) 158 with io.open_code(pathname) as fp: 159 stuff = (ext, "rb", _PY_SOURCE) 160 self.load_module(name, fp, pathname, stuff) 161 162 def import_hook(self, name, caller=None, fromlist=None, level=-1): 163 self.msg(3, "import_hook", name, caller, fromlist, level) 164 parent = self.determine_parent(caller, level=level) 165 q, tail = self.find_head_package(parent, name) 166 m = self.load_tail(q, tail) 167 if not fromlist: 168 return q 169 if m.__path__: 170 self.ensure_fromlist(m, fromlist) 171 return None 172 173 def determine_parent(self, caller, level=-1): 174 self.msgin(4, "determine_parent", caller, level) 175 if not caller or level == 0: 176 self.msgout(4, "determine_parent -> None") 177 return None 178 pname = caller.__name__ 179 if level >= 1: # relative import 180 if caller.__path__: 181 level -= 1 182 if level == 0: 183 parent = self.modules[pname] 184 assert parent is caller 185 self.msgout(4, "determine_parent ->", parent) 186 return parent 187 if pname.count(".") < level: 188 raise ImportError("relative importpath too deep") 189 pname = ".".join(pname.split(".")[:-level]) 190 parent = self.modules[pname] 191 self.msgout(4, "determine_parent ->", parent) 192 return parent 193 if caller.__path__: 194 parent = self.modules[pname] 195 assert caller is parent 196 self.msgout(4, "determine_parent ->", parent) 197 return parent 198 if '.' in pname: 199 i = pname.rfind('.') 200 pname = pname[:i] 201 parent = self.modules[pname] 202 assert parent.__name__ == pname 203 self.msgout(4, "determine_parent ->", parent) 204 return parent 205 self.msgout(4, "determine_parent -> None") 206 return None 207 208 def find_head_package(self, parent, name): 209 self.msgin(4, "find_head_package", parent, name) 210 if '.' in name: 211 i = name.find('.') 212 head = name[:i] 213 tail = name[i+1:] 214 else: 215 head = name 216 tail = "" 217 if parent: 218 qname = "%s.%s" % (parent.__name__, head) 219 else: 220 qname = head 221 q = self.import_module(head, qname, parent) 222 if q: 223 self.msgout(4, "find_head_package ->", (q, tail)) 224 return q, tail 225 if parent: 226 qname = head 227 parent = None 228 q = self.import_module(head, qname, parent) 229 if q: 230 self.msgout(4, "find_head_package ->", (q, tail)) 231 return q, tail 232 self.msgout(4, "raise ImportError: No module named", qname) 233 raise ImportError("No module named " + qname) 234 235 def load_tail(self, q, tail): 236 self.msgin(4, "load_tail", q, tail) 237 m = q 238 while tail: 239 i = tail.find('.') 240 if i < 0: i = len(tail) 241 head, tail = tail[:i], tail[i+1:] 242 mname = "%s.%s" % (m.__name__, head) 243 m = self.import_module(head, mname, m) 244 if not m: 245 self.msgout(4, "raise ImportError: No module named", mname) 246 raise ImportError("No module named " + mname) 247 self.msgout(4, "load_tail ->", m) 248 return m 249 250 def ensure_fromlist(self, m, fromlist, recursive=0): 251 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 252 for sub in fromlist: 253 if sub == "*": 254 if not recursive: 255 all = self.find_all_submodules(m) 256 if all: 257 self.ensure_fromlist(m, all, 1) 258 elif not hasattr(m, sub): 259 subname = "%s.%s" % (m.__name__, sub) 260 submod = self.import_module(sub, subname, m) 261 if not submod: 262 raise ImportError("No module named " + subname) 263 264 def find_all_submodules(self, m): 265 if not m.__path__: 266 return 267 modules = {} 268 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. 269 # But we must also collect Python extension modules - although 270 # we cannot separate normal dlls from Python extensions. 271 suffixes = [] 272 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] 273 suffixes += importlib.machinery.SOURCE_SUFFIXES[:] 274 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] 275 for dir in m.__path__: 276 try: 277 names = os.listdir(dir) 278 except OSError: 279 self.msg(2, "can't list directory", dir) 280 continue 281 for name in names: 282 mod = None 283 for suff in suffixes: 284 n = len(suff) 285 if name[-n:] == suff: 286 mod = name[:-n] 287 break 288 if mod and mod != "__init__": 289 modules[mod] = mod 290 return modules.keys() 291 292 def import_module(self, partname, fqname, parent): 293 self.msgin(3, "import_module", partname, fqname, parent) 294 try: 295 m = self.modules[fqname] 296 except KeyError: 297 pass 298 else: 299 self.msgout(3, "import_module ->", m) 300 return m 301 if fqname in self.badmodules: 302 self.msgout(3, "import_module -> None") 303 return None 304 if parent and parent.__path__ is None: 305 self.msgout(3, "import_module -> None") 306 return None 307 try: 308 fp, pathname, stuff = self.find_module(partname, 309 parent and parent.__path__, parent) 310 except ImportError: 311 self.msgout(3, "import_module ->", None) 312 return None 313 314 try: 315 m = self.load_module(fqname, fp, pathname, stuff) 316 finally: 317 if fp: 318 fp.close() 319 if parent: 320 setattr(parent, partname, m) 321 self.msgout(3, "import_module ->", m) 322 return m 323 324 def load_module(self, fqname, fp, pathname, file_info): 325 suffix, mode, type = file_info 326 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 327 if type == _PKG_DIRECTORY: 328 m = self.load_package(fqname, pathname) 329 self.msgout(2, "load_module ->", m) 330 return m 331 if type == _PY_SOURCE: 332 co = compile(fp.read(), pathname, 'exec') 333 elif type == _PY_COMPILED: 334 try: 335 data = fp.read() 336 importlib._bootstrap_external._classify_pyc(data, fqname, {}) 337 except ImportError as exc: 338 self.msgout(2, "raise ImportError: " + str(exc), pathname) 339 raise 340 co = marshal.loads(memoryview(data)[16:]) 341 else: 342 co = None 343 m = self.add_module(fqname) 344 m.__file__ = pathname 345 if co: 346 if self.replace_paths: 347 co = self.replace_paths_in_code(co) 348 m.__code__ = co 349 self.scan_code(co, m) 350 self.msgout(2, "load_module ->", m) 351 return m 352 353 def _add_badmodule(self, name, caller): 354 if name not in self.badmodules: 355 self.badmodules[name] = {} 356 if caller: 357 self.badmodules[name][caller.__name__] = 1 358 else: 359 self.badmodules[name]["-"] = 1 360 361 def _safe_import_hook(self, name, caller, fromlist, level=-1): 362 # wrapper for self.import_hook() that won't raise ImportError 363 if name in self.badmodules: 364 self._add_badmodule(name, caller) 365 return 366 try: 367 self.import_hook(name, caller, level=level) 368 except ImportError as msg: 369 self.msg(2, "ImportError:", str(msg)) 370 self._add_badmodule(name, caller) 371 except SyntaxError as msg: 372 self.msg(2, "SyntaxError:", str(msg)) 373 self._add_badmodule(name, caller) 374 else: 375 if fromlist: 376 for sub in fromlist: 377 fullname = name + "." + sub 378 if fullname in self.badmodules: 379 self._add_badmodule(fullname, caller) 380 continue 381 try: 382 self.import_hook(name, caller, [sub], level=level) 383 except ImportError as msg: 384 self.msg(2, "ImportError:", str(msg)) 385 self._add_badmodule(fullname, caller) 386 387 def scan_opcodes(self, co): 388 # Scan the code, and yield 'interesting' opcode combinations 389 for name in dis._find_store_names(co): 390 yield "store", (name,) 391 for name, level, fromlist in dis._find_imports(co): 392 if level == 0: # absolute import 393 yield "absolute_import", (fromlist, name) 394 else: # relative import 395 yield "relative_import", (level, fromlist, name) 396 397 def scan_code(self, co, m): 398 code = co.co_code 399 scanner = self.scan_opcodes 400 for what, args in scanner(co): 401 if what == "store": 402 name, = args 403 m.globalnames[name] = 1 404 elif what == "absolute_import": 405 fromlist, name = args 406 have_star = 0 407 if fromlist is not None: 408 if "*" in fromlist: 409 have_star = 1 410 fromlist = [f for f in fromlist if f != "*"] 411 self._safe_import_hook(name, m, fromlist, level=0) 412 if have_star: 413 # We've encountered an "import *". If it is a Python module, 414 # the code has already been parsed and we can suck out the 415 # global names. 416 mm = None 417 if m.__path__: 418 # At this point we don't know whether 'name' is a 419 # submodule of 'm' or a global module. Let's just try 420 # the full name first. 421 mm = self.modules.get(m.__name__ + "." + name) 422 if mm is None: 423 mm = self.modules.get(name) 424 if mm is not None: 425 m.globalnames.update(mm.globalnames) 426 m.starimports.update(mm.starimports) 427 if mm.__code__ is None: 428 m.starimports[name] = 1 429 else: 430 m.starimports[name] = 1 431 elif what == "relative_import": 432 level, fromlist, name = args 433 if name: 434 self._safe_import_hook(name, m, fromlist, level=level) 435 else: 436 parent = self.determine_parent(m, level=level) 437 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 438 else: 439 # We don't expect anything else from the generator. 440 raise RuntimeError(what) 441 442 for c in co.co_consts: 443 if isinstance(c, type(co)): 444 self.scan_code(c, m) 445 446 def load_package(self, fqname, pathname): 447 self.msgin(2, "load_package", fqname, pathname) 448 newname = replacePackageMap.get(fqname) 449 if newname: 450 fqname = newname 451 m = self.add_module(fqname) 452 m.__file__ = pathname 453 m.__path__ = [pathname] 454 455 # As per comment at top of file, simulate runtime __path__ additions. 456 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 457 458 fp, buf, stuff = self.find_module("__init__", m.__path__) 459 try: 460 self.load_module(fqname, fp, buf, stuff) 461 self.msgout(2, "load_package ->", m) 462 return m 463 finally: 464 if fp: 465 fp.close() 466 467 def add_module(self, fqname): 468 if fqname in self.modules: 469 return self.modules[fqname] 470 self.modules[fqname] = m = Module(fqname) 471 return m 472 473 def find_module(self, name, path, parent=None): 474 if parent is not None: 475 # assert path is not None 476 fullname = parent.__name__+'.'+name 477 else: 478 fullname = name 479 if fullname in self.excludes: 480 self.msgout(3, "find_module -> Excluded", fullname) 481 raise ImportError(name) 482 483 if path is None: 484 if name in sys.builtin_module_names: 485 return (None, None, ("", "", _C_BUILTIN)) 486 487 path = self.path 488 489 return _find_module(name, path) 490 491 def report(self): 492 """Print a report to stdout, listing the found modules with their 493 paths, as well as modules that are missing, or seem to be missing. 494 """ 495 print() 496 print(" %-25s %s" % ("Name", "File")) 497 print(" %-25s %s" % ("----", "----")) 498 # Print modules found 499 keys = sorted(self.modules.keys()) 500 for key in keys: 501 m = self.modules[key] 502 if m.__path__: 503 print("P", end=' ') 504 else: 505 print("m", end=' ') 506 print("%-25s" % key, m.__file__ or "") 507 508 # Print missing modules 509 missing, maybe = self.any_missing_maybe() 510 if missing: 511 print() 512 print("Missing modules:") 513 for name in missing: 514 mods = sorted(self.badmodules[name].keys()) 515 print("?", name, "imported from", ', '.join(mods)) 516 # Print modules that may be missing, but then again, maybe not... 517 if maybe: 518 print() 519 print("Submodules that appear to be missing, but could also be", end=' ') 520 print("global names in the parent package:") 521 for name in maybe: 522 mods = sorted(self.badmodules[name].keys()) 523 print("?", name, "imported from", ', '.join(mods)) 524 525 def any_missing(self): 526 """Return a list of modules that appear to be missing. Use 527 any_missing_maybe() if you want to know which modules are 528 certain to be missing, and which *may* be missing. 529 """ 530 missing, maybe = self.any_missing_maybe() 531 return missing + maybe 532 533 def any_missing_maybe(self): 534 """Return two lists, one with modules that are certainly missing 535 and one with modules that *may* be missing. The latter names could 536 either be submodules *or* just global names in the package. 537 538 The reason it can't always be determined is that it's impossible to 539 tell which names are imported when "from module import *" is done 540 with an extension module, short of actually importing it. 541 """ 542 missing = [] 543 maybe = [] 544 for name in self.badmodules: 545 if name in self.excludes: 546 continue 547 i = name.rfind(".") 548 if i < 0: 549 missing.append(name) 550 continue 551 subname = name[i+1:] 552 pkgname = name[:i] 553 pkg = self.modules.get(pkgname) 554 if pkg is not None: 555 if pkgname in self.badmodules[name]: 556 # The package tried to import this module itself and 557 # failed. It's definitely missing. 558 missing.append(name) 559 elif subname in pkg.globalnames: 560 # It's a global in the package: definitely not missing. 561 pass 562 elif pkg.starimports: 563 # It could be missing, but the package did an "import *" 564 # from a non-Python module, so we simply can't be sure. 565 maybe.append(name) 566 else: 567 # It's not a global in the package, the package didn't 568 # do funny star imports, it's very likely to be missing. 569 # The symbol could be inserted into the package from the 570 # outside, but since that's not good style we simply list 571 # it missing. 572 missing.append(name) 573 else: 574 missing.append(name) 575 missing.sort() 576 maybe.sort() 577 return missing, maybe 578 579 def replace_paths_in_code(self, co): 580 new_filename = original_filename = os.path.normpath(co.co_filename) 581 for f, r in self.replace_paths: 582 if original_filename.startswith(f): 583 new_filename = r + original_filename[len(f):] 584 break 585 586 if self.debug and original_filename not in self.processed_paths: 587 if new_filename != original_filename: 588 self.msgout(2, "co_filename %r changed to %r" \ 589 % (original_filename,new_filename,)) 590 else: 591 self.msgout(2, "co_filename %r remains unchanged" \ 592 % (original_filename,)) 593 self.processed_paths.append(original_filename) 594 595 consts = list(co.co_consts) 596 for i in range(len(consts)): 597 if isinstance(consts[i], type(co)): 598 consts[i] = self.replace_paths_in_code(consts[i]) 599 600 return co.replace(co_consts=tuple(consts), co_filename=new_filename) 601 602 603def test(): 604 # Parse command line 605 import getopt 606 try: 607 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 608 except getopt.error as msg: 609 print(msg) 610 return 611 612 # Process options 613 debug = 1 614 domods = 0 615 addpath = [] 616 exclude = [] 617 for o, a in opts: 618 if o == '-d': 619 debug = debug + 1 620 if o == '-m': 621 domods = 1 622 if o == '-p': 623 addpath = addpath + a.split(os.pathsep) 624 if o == '-q': 625 debug = 0 626 if o == '-x': 627 exclude.append(a) 628 629 # Provide default arguments 630 if not args: 631 script = "hello.py" 632 else: 633 script = args[0] 634 635 # Set the path based on sys.path and the script directory 636 path = sys.path[:] 637 path[0] = os.path.dirname(script) 638 path = addpath + path 639 if debug > 1: 640 print("path:") 641 for item in path: 642 print(" ", repr(item)) 643 644 # Create the module finder and turn its crank 645 mf = ModuleFinder(path, debug, exclude) 646 for arg in args[1:]: 647 if arg == '-m': 648 domods = 1 649 continue 650 if domods: 651 if arg[-2:] == '.*': 652 mf.import_hook(arg[:-2], None, ["*"]) 653 else: 654 mf.import_hook(arg) 655 else: 656 mf.load_file(arg) 657 mf.run_script(script) 658 mf.report() 659 return mf # for -i debugging 660 661 662if __name__ == '__main__': 663 try: 664 mf = test() 665 except KeyboardInterrupt: 666 print("\n[interrupted]") 667