1# ext/extract.py 2# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file> 3# 4# This module is part of Mako and is released under 5# the MIT License: http://www.opensource.org/licenses/mit-license.php 6 7from io import BytesIO 8from io import StringIO 9import re 10 11from mako import lexer 12from mako import parsetree 13 14 15class MessageExtractor: 16 use_bytes = True 17 18 def process_file(self, fileobj): 19 template_node = lexer.Lexer( 20 fileobj.read(), input_encoding=self.config["encoding"] 21 ).parse() 22 yield from self.extract_nodes(template_node.get_children()) 23 24 def extract_nodes(self, nodes): 25 translator_comments = [] 26 in_translator_comments = False 27 input_encoding = self.config["encoding"] or "ascii" 28 comment_tags = list( 29 filter(None, re.split(r"\s+", self.config["comment-tags"])) 30 ) 31 32 for node in nodes: 33 child_nodes = None 34 if ( 35 in_translator_comments 36 and isinstance(node, parsetree.Text) 37 and not node.content.strip() 38 ): 39 # Ignore whitespace within translator comments 40 continue 41 42 if isinstance(node, parsetree.Comment): 43 value = node.text.strip() 44 if in_translator_comments: 45 translator_comments.extend( 46 self._split_comment(node.lineno, value) 47 ) 48 continue 49 for comment_tag in comment_tags: 50 if value.startswith(comment_tag): 51 in_translator_comments = True 52 translator_comments.extend( 53 self._split_comment(node.lineno, value) 54 ) 55 continue 56 57 if isinstance(node, parsetree.DefTag): 58 code = node.function_decl.code 59 child_nodes = node.nodes 60 elif isinstance(node, parsetree.BlockTag): 61 code = node.body_decl.code 62 child_nodes = node.nodes 63 elif isinstance(node, parsetree.CallTag): 64 code = node.code.code 65 child_nodes = node.nodes 66 elif isinstance(node, parsetree.PageTag): 67 code = node.body_decl.code 68 elif isinstance(node, parsetree.CallNamespaceTag): 69 code = node.expression 70 child_nodes = node.nodes 71 elif isinstance(node, parsetree.ControlLine): 72 if node.isend: 73 in_translator_comments = False 74 continue 75 code = node.text 76 elif isinstance(node, parsetree.Code): 77 in_translator_comments = False 78 code = node.code.code 79 elif isinstance(node, parsetree.Expression): 80 code = node.code.code 81 else: 82 continue 83 84 # Comments don't apply unless they immediately precede the message 85 if ( 86 translator_comments 87 and translator_comments[-1][0] < node.lineno - 1 88 ): 89 translator_comments = [] 90 91 translator_strings = [ 92 comment[1] for comment in translator_comments 93 ] 94 95 if isinstance(code, str) and self.use_bytes: 96 code = code.encode(input_encoding, "backslashreplace") 97 98 used_translator_comments = False 99 # We add extra newline to work around a pybabel bug 100 # (see python-babel/babel#274, parse_encoding dies if the first 101 # input string of the input is non-ascii) 102 # Also, because we added it, we have to subtract one from 103 # node.lineno 104 if self.use_bytes: 105 code = BytesIO(b"\n" + code) 106 else: 107 code = StringIO("\n" + code) 108 109 for message in self.process_python( 110 code, node.lineno - 1, translator_strings 111 ): 112 yield message 113 used_translator_comments = True 114 115 if used_translator_comments: 116 translator_comments = [] 117 in_translator_comments = False 118 119 if child_nodes: 120 yield from self.extract_nodes(child_nodes) 121 122 @staticmethod 123 def _split_comment(lineno, comment): 124 """Return the multiline comment at lineno split into a list of 125 comment line numbers and the accompanying comment line""" 126 return [ 127 (lineno + index, line) 128 for index, line in enumerate(comment.splitlines()) 129 ] 130