xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/email/feedparser.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1*cda5da8dSAndroid Build Coastguard Worker# Copyright (C) 2004-2006 Python Software Foundation
2*cda5da8dSAndroid Build Coastguard Worker# Authors: Baxter, Wouters and Warsaw
3*cda5da8dSAndroid Build Coastguard Worker# Contact: [email protected]
4*cda5da8dSAndroid Build Coastguard Worker
5*cda5da8dSAndroid Build Coastguard Worker"""FeedParser - An email feed parser.
6*cda5da8dSAndroid Build Coastguard Worker
7*cda5da8dSAndroid Build Coastguard WorkerThe feed parser implements an interface for incrementally parsing an email
8*cda5da8dSAndroid Build Coastguard Workermessage, line by line.  This has advantages for certain applications, such as
9*cda5da8dSAndroid Build Coastguard Workerthose reading email messages off a socket.
10*cda5da8dSAndroid Build Coastguard Worker
11*cda5da8dSAndroid Build Coastguard WorkerFeedParser.feed() is the primary interface for pushing new data into the
12*cda5da8dSAndroid Build Coastguard Workerparser.  It returns when there's nothing more it can do with the available
13*cda5da8dSAndroid Build Coastguard Workerdata.  When you have no more data to push into the parser, call .close().
14*cda5da8dSAndroid Build Coastguard WorkerThis completes the parsing and returns the root message object.
15*cda5da8dSAndroid Build Coastguard Worker
16*cda5da8dSAndroid Build Coastguard WorkerThe other advantage of this parser is that it will never raise a parsing
17*cda5da8dSAndroid Build Coastguard Workerexception.  Instead, when it finds something unexpected, it adds a 'defect' to
18*cda5da8dSAndroid Build Coastguard Workerthe current message.  Defects are just instances that live on the message
19*cda5da8dSAndroid Build Coastguard Workerobject's .defects attribute.
20*cda5da8dSAndroid Build Coastguard Worker"""
21*cda5da8dSAndroid Build Coastguard Worker
22*cda5da8dSAndroid Build Coastguard Worker__all__ = ['FeedParser', 'BytesFeedParser']
23*cda5da8dSAndroid Build Coastguard Worker
24*cda5da8dSAndroid Build Coastguard Workerimport re
25*cda5da8dSAndroid Build Coastguard Worker
26*cda5da8dSAndroid Build Coastguard Workerfrom email import errors
27*cda5da8dSAndroid Build Coastguard Workerfrom email._policybase import compat32
28*cda5da8dSAndroid Build Coastguard Workerfrom collections import deque
29*cda5da8dSAndroid Build Coastguard Workerfrom io import StringIO
30*cda5da8dSAndroid Build Coastguard Worker
31*cda5da8dSAndroid Build Coastguard WorkerNLCRE = re.compile(r'\r\n|\r|\n')
32*cda5da8dSAndroid Build Coastguard WorkerNLCRE_bol = re.compile(r'(\r\n|\r|\n)')
33*cda5da8dSAndroid Build Coastguard WorkerNLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
34*cda5da8dSAndroid Build Coastguard WorkerNLCRE_crack = re.compile(r'(\r\n|\r|\n)')
35*cda5da8dSAndroid Build Coastguard Worker# RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
36*cda5da8dSAndroid Build Coastguard Worker# except controls, SP, and ":".
37*cda5da8dSAndroid Build Coastguard WorkerheaderRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
38*cda5da8dSAndroid Build Coastguard WorkerEMPTYSTRING = ''
39*cda5da8dSAndroid Build Coastguard WorkerNL = '\n'
40*cda5da8dSAndroid Build Coastguard Worker
41*cda5da8dSAndroid Build Coastguard WorkerNeedMoreData = object()
42*cda5da8dSAndroid Build Coastguard Worker
43*cda5da8dSAndroid Build Coastguard Worker
44*cda5da8dSAndroid Build Coastguard Worker
45*cda5da8dSAndroid Build Coastguard Workerclass BufferedSubFile(object):
46*cda5da8dSAndroid Build Coastguard Worker    """A file-ish object that can have new data loaded into it.
47*cda5da8dSAndroid Build Coastguard Worker
48*cda5da8dSAndroid Build Coastguard Worker    You can also push and pop line-matching predicates onto a stack.  When the
49*cda5da8dSAndroid Build Coastguard Worker    current predicate matches the current line, a false EOF response
50*cda5da8dSAndroid Build Coastguard Worker    (i.e. empty string) is returned instead.  This lets the parser adhere to a
51*cda5da8dSAndroid Build Coastguard Worker    simple abstraction -- it parses until EOF closes the current message.
52*cda5da8dSAndroid Build Coastguard Worker    """
53*cda5da8dSAndroid Build Coastguard Worker    def __init__(self):
54*cda5da8dSAndroid Build Coastguard Worker        # Text stream of the last partial line pushed into this object.
55*cda5da8dSAndroid Build Coastguard Worker        # See issue 22233 for why this is a text stream and not a list.
56*cda5da8dSAndroid Build Coastguard Worker        self._partial = StringIO(newline='')
57*cda5da8dSAndroid Build Coastguard Worker        # A deque of full, pushed lines
58*cda5da8dSAndroid Build Coastguard Worker        self._lines = deque()
59*cda5da8dSAndroid Build Coastguard Worker        # The stack of false-EOF checking predicates.
60*cda5da8dSAndroid Build Coastguard Worker        self._eofstack = []
61*cda5da8dSAndroid Build Coastguard Worker        # A flag indicating whether the file has been closed or not.
62*cda5da8dSAndroid Build Coastguard Worker        self._closed = False
63*cda5da8dSAndroid Build Coastguard Worker
64*cda5da8dSAndroid Build Coastguard Worker    def push_eof_matcher(self, pred):
65*cda5da8dSAndroid Build Coastguard Worker        self._eofstack.append(pred)
66*cda5da8dSAndroid Build Coastguard Worker
67*cda5da8dSAndroid Build Coastguard Worker    def pop_eof_matcher(self):
68*cda5da8dSAndroid Build Coastguard Worker        return self._eofstack.pop()
69*cda5da8dSAndroid Build Coastguard Worker
70*cda5da8dSAndroid Build Coastguard Worker    def close(self):
71*cda5da8dSAndroid Build Coastguard Worker        # Don't forget any trailing partial line.
72*cda5da8dSAndroid Build Coastguard Worker        self._partial.seek(0)
73*cda5da8dSAndroid Build Coastguard Worker        self.pushlines(self._partial.readlines())
74*cda5da8dSAndroid Build Coastguard Worker        self._partial.seek(0)
75*cda5da8dSAndroid Build Coastguard Worker        self._partial.truncate()
76*cda5da8dSAndroid Build Coastguard Worker        self._closed = True
77*cda5da8dSAndroid Build Coastguard Worker
78*cda5da8dSAndroid Build Coastguard Worker    def readline(self):
79*cda5da8dSAndroid Build Coastguard Worker        if not self._lines:
80*cda5da8dSAndroid Build Coastguard Worker            if self._closed:
81*cda5da8dSAndroid Build Coastguard Worker                return ''
82*cda5da8dSAndroid Build Coastguard Worker            return NeedMoreData
83*cda5da8dSAndroid Build Coastguard Worker        # Pop the line off the stack and see if it matches the current
84*cda5da8dSAndroid Build Coastguard Worker        # false-EOF predicate.
85*cda5da8dSAndroid Build Coastguard Worker        line = self._lines.popleft()
86*cda5da8dSAndroid Build Coastguard Worker        # RFC 2046, section 5.1.2 requires us to recognize outer level
87*cda5da8dSAndroid Build Coastguard Worker        # boundaries at any level of inner nesting.  Do this, but be sure it's
88*cda5da8dSAndroid Build Coastguard Worker        # in the order of most to least nested.
89*cda5da8dSAndroid Build Coastguard Worker        for ateof in reversed(self._eofstack):
90*cda5da8dSAndroid Build Coastguard Worker            if ateof(line):
91*cda5da8dSAndroid Build Coastguard Worker                # We're at the false EOF.  But push the last line back first.
92*cda5da8dSAndroid Build Coastguard Worker                self._lines.appendleft(line)
93*cda5da8dSAndroid Build Coastguard Worker                return ''
94*cda5da8dSAndroid Build Coastguard Worker        return line
95*cda5da8dSAndroid Build Coastguard Worker
96*cda5da8dSAndroid Build Coastguard Worker    def unreadline(self, line):
97*cda5da8dSAndroid Build Coastguard Worker        # Let the consumer push a line back into the buffer.
98*cda5da8dSAndroid Build Coastguard Worker        assert line is not NeedMoreData
99*cda5da8dSAndroid Build Coastguard Worker        self._lines.appendleft(line)
100*cda5da8dSAndroid Build Coastguard Worker
101*cda5da8dSAndroid Build Coastguard Worker    def push(self, data):
102*cda5da8dSAndroid Build Coastguard Worker        """Push some new data into this object."""
103*cda5da8dSAndroid Build Coastguard Worker        self._partial.write(data)
104*cda5da8dSAndroid Build Coastguard Worker        if '\n' not in data and '\r' not in data:
105*cda5da8dSAndroid Build Coastguard Worker            # No new complete lines, wait for more.
106*cda5da8dSAndroid Build Coastguard Worker            return
107*cda5da8dSAndroid Build Coastguard Worker
108*cda5da8dSAndroid Build Coastguard Worker        # Crack into lines, preserving the linesep characters.
109*cda5da8dSAndroid Build Coastguard Worker        self._partial.seek(0)
110*cda5da8dSAndroid Build Coastguard Worker        parts = self._partial.readlines()
111*cda5da8dSAndroid Build Coastguard Worker        self._partial.seek(0)
112*cda5da8dSAndroid Build Coastguard Worker        self._partial.truncate()
113*cda5da8dSAndroid Build Coastguard Worker
114*cda5da8dSAndroid Build Coastguard Worker        # If the last element of the list does not end in a newline, then treat
115*cda5da8dSAndroid Build Coastguard Worker        # it as a partial line.  We only check for '\n' here because a line
116*cda5da8dSAndroid Build Coastguard Worker        # ending with '\r' might be a line that was split in the middle of a
117*cda5da8dSAndroid Build Coastguard Worker        # '\r\n' sequence (see bugs 1555570 and 1721862).
118*cda5da8dSAndroid Build Coastguard Worker        if not parts[-1].endswith('\n'):
119*cda5da8dSAndroid Build Coastguard Worker            self._partial.write(parts.pop())
120*cda5da8dSAndroid Build Coastguard Worker        self.pushlines(parts)
121*cda5da8dSAndroid Build Coastguard Worker
122*cda5da8dSAndroid Build Coastguard Worker    def pushlines(self, lines):
123*cda5da8dSAndroid Build Coastguard Worker        self._lines.extend(lines)
124*cda5da8dSAndroid Build Coastguard Worker
125*cda5da8dSAndroid Build Coastguard Worker    def __iter__(self):
126*cda5da8dSAndroid Build Coastguard Worker        return self
127*cda5da8dSAndroid Build Coastguard Worker
128*cda5da8dSAndroid Build Coastguard Worker    def __next__(self):
129*cda5da8dSAndroid Build Coastguard Worker        line = self.readline()
130*cda5da8dSAndroid Build Coastguard Worker        if line == '':
131*cda5da8dSAndroid Build Coastguard Worker            raise StopIteration
132*cda5da8dSAndroid Build Coastguard Worker        return line
133*cda5da8dSAndroid Build Coastguard Worker
134*cda5da8dSAndroid Build Coastguard Worker
135*cda5da8dSAndroid Build Coastguard Worker
136*cda5da8dSAndroid Build Coastguard Workerclass FeedParser:
137*cda5da8dSAndroid Build Coastguard Worker    """A feed-style parser of email."""
138*cda5da8dSAndroid Build Coastguard Worker
139*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, _factory=None, *, policy=compat32):
140*cda5da8dSAndroid Build Coastguard Worker        """_factory is called with no arguments to create a new message obj
141*cda5da8dSAndroid Build Coastguard Worker
142*cda5da8dSAndroid Build Coastguard Worker        The policy keyword specifies a policy object that controls a number of
143*cda5da8dSAndroid Build Coastguard Worker        aspects of the parser's operation.  The default policy maintains
144*cda5da8dSAndroid Build Coastguard Worker        backward compatibility.
145*cda5da8dSAndroid Build Coastguard Worker
146*cda5da8dSAndroid Build Coastguard Worker        """
147*cda5da8dSAndroid Build Coastguard Worker        self.policy = policy
148*cda5da8dSAndroid Build Coastguard Worker        self._old_style_factory = False
149*cda5da8dSAndroid Build Coastguard Worker        if _factory is None:
150*cda5da8dSAndroid Build Coastguard Worker            if policy.message_factory is None:
151*cda5da8dSAndroid Build Coastguard Worker                from email.message import Message
152*cda5da8dSAndroid Build Coastguard Worker                self._factory = Message
153*cda5da8dSAndroid Build Coastguard Worker            else:
154*cda5da8dSAndroid Build Coastguard Worker                self._factory = policy.message_factory
155*cda5da8dSAndroid Build Coastguard Worker        else:
156*cda5da8dSAndroid Build Coastguard Worker            self._factory = _factory
157*cda5da8dSAndroid Build Coastguard Worker            try:
158*cda5da8dSAndroid Build Coastguard Worker                _factory(policy=self.policy)
159*cda5da8dSAndroid Build Coastguard Worker            except TypeError:
160*cda5da8dSAndroid Build Coastguard Worker                # Assume this is an old-style factory
161*cda5da8dSAndroid Build Coastguard Worker                self._old_style_factory = True
162*cda5da8dSAndroid Build Coastguard Worker        self._input = BufferedSubFile()
163*cda5da8dSAndroid Build Coastguard Worker        self._msgstack = []
164*cda5da8dSAndroid Build Coastguard Worker        self._parse = self._parsegen().__next__
165*cda5da8dSAndroid Build Coastguard Worker        self._cur = None
166*cda5da8dSAndroid Build Coastguard Worker        self._last = None
167*cda5da8dSAndroid Build Coastguard Worker        self._headersonly = False
168*cda5da8dSAndroid Build Coastguard Worker
169*cda5da8dSAndroid Build Coastguard Worker    # Non-public interface for supporting Parser's headersonly flag
170*cda5da8dSAndroid Build Coastguard Worker    def _set_headersonly(self):
171*cda5da8dSAndroid Build Coastguard Worker        self._headersonly = True
172*cda5da8dSAndroid Build Coastguard Worker
173*cda5da8dSAndroid Build Coastguard Worker    def feed(self, data):
174*cda5da8dSAndroid Build Coastguard Worker        """Push more data into the parser."""
175*cda5da8dSAndroid Build Coastguard Worker        self._input.push(data)
176*cda5da8dSAndroid Build Coastguard Worker        self._call_parse()
177*cda5da8dSAndroid Build Coastguard Worker
178*cda5da8dSAndroid Build Coastguard Worker    def _call_parse(self):
179*cda5da8dSAndroid Build Coastguard Worker        try:
180*cda5da8dSAndroid Build Coastguard Worker            self._parse()
181*cda5da8dSAndroid Build Coastguard Worker        except StopIteration:
182*cda5da8dSAndroid Build Coastguard Worker            pass
183*cda5da8dSAndroid Build Coastguard Worker
184*cda5da8dSAndroid Build Coastguard Worker    def close(self):
185*cda5da8dSAndroid Build Coastguard Worker        """Parse all remaining data and return the root message object."""
186*cda5da8dSAndroid Build Coastguard Worker        self._input.close()
187*cda5da8dSAndroid Build Coastguard Worker        self._call_parse()
188*cda5da8dSAndroid Build Coastguard Worker        root = self._pop_message()
189*cda5da8dSAndroid Build Coastguard Worker        assert not self._msgstack
190*cda5da8dSAndroid Build Coastguard Worker        # Look for final set of defects
191*cda5da8dSAndroid Build Coastguard Worker        if root.get_content_maintype() == 'multipart' \
192*cda5da8dSAndroid Build Coastguard Worker               and not root.is_multipart():
193*cda5da8dSAndroid Build Coastguard Worker            defect = errors.MultipartInvariantViolationDefect()
194*cda5da8dSAndroid Build Coastguard Worker            self.policy.handle_defect(root, defect)
195*cda5da8dSAndroid Build Coastguard Worker        return root
196*cda5da8dSAndroid Build Coastguard Worker
197*cda5da8dSAndroid Build Coastguard Worker    def _new_message(self):
198*cda5da8dSAndroid Build Coastguard Worker        if self._old_style_factory:
199*cda5da8dSAndroid Build Coastguard Worker            msg = self._factory()
200*cda5da8dSAndroid Build Coastguard Worker        else:
201*cda5da8dSAndroid Build Coastguard Worker            msg = self._factory(policy=self.policy)
202*cda5da8dSAndroid Build Coastguard Worker        if self._cur and self._cur.get_content_type() == 'multipart/digest':
203*cda5da8dSAndroid Build Coastguard Worker            msg.set_default_type('message/rfc822')
204*cda5da8dSAndroid Build Coastguard Worker        if self._msgstack:
205*cda5da8dSAndroid Build Coastguard Worker            self._msgstack[-1].attach(msg)
206*cda5da8dSAndroid Build Coastguard Worker        self._msgstack.append(msg)
207*cda5da8dSAndroid Build Coastguard Worker        self._cur = msg
208*cda5da8dSAndroid Build Coastguard Worker        self._last = msg
209*cda5da8dSAndroid Build Coastguard Worker
210*cda5da8dSAndroid Build Coastguard Worker    def _pop_message(self):
211*cda5da8dSAndroid Build Coastguard Worker        retval = self._msgstack.pop()
212*cda5da8dSAndroid Build Coastguard Worker        if self._msgstack:
213*cda5da8dSAndroid Build Coastguard Worker            self._cur = self._msgstack[-1]
214*cda5da8dSAndroid Build Coastguard Worker        else:
215*cda5da8dSAndroid Build Coastguard Worker            self._cur = None
216*cda5da8dSAndroid Build Coastguard Worker        return retval
217*cda5da8dSAndroid Build Coastguard Worker
218*cda5da8dSAndroid Build Coastguard Worker    def _parsegen(self):
219*cda5da8dSAndroid Build Coastguard Worker        # Create a new message and start by parsing headers.
220*cda5da8dSAndroid Build Coastguard Worker        self._new_message()
221*cda5da8dSAndroid Build Coastguard Worker        headers = []
222*cda5da8dSAndroid Build Coastguard Worker        # Collect the headers, searching for a line that doesn't match the RFC
223*cda5da8dSAndroid Build Coastguard Worker        # 2822 header or continuation pattern (including an empty line).
224*cda5da8dSAndroid Build Coastguard Worker        for line in self._input:
225*cda5da8dSAndroid Build Coastguard Worker            if line is NeedMoreData:
226*cda5da8dSAndroid Build Coastguard Worker                yield NeedMoreData
227*cda5da8dSAndroid Build Coastguard Worker                continue
228*cda5da8dSAndroid Build Coastguard Worker            if not headerRE.match(line):
229*cda5da8dSAndroid Build Coastguard Worker                # If we saw the RFC defined header/body separator
230*cda5da8dSAndroid Build Coastguard Worker                # (i.e. newline), just throw it away. Otherwise the line is
231*cda5da8dSAndroid Build Coastguard Worker                # part of the body so push it back.
232*cda5da8dSAndroid Build Coastguard Worker                if not NLCRE.match(line):
233*cda5da8dSAndroid Build Coastguard Worker                    defect = errors.MissingHeaderBodySeparatorDefect()
234*cda5da8dSAndroid Build Coastguard Worker                    self.policy.handle_defect(self._cur, defect)
235*cda5da8dSAndroid Build Coastguard Worker                    self._input.unreadline(line)
236*cda5da8dSAndroid Build Coastguard Worker                break
237*cda5da8dSAndroid Build Coastguard Worker            headers.append(line)
238*cda5da8dSAndroid Build Coastguard Worker        # Done with the headers, so parse them and figure out what we're
239*cda5da8dSAndroid Build Coastguard Worker        # supposed to see in the body of the message.
240*cda5da8dSAndroid Build Coastguard Worker        self._parse_headers(headers)
241*cda5da8dSAndroid Build Coastguard Worker        # Headers-only parsing is a backwards compatibility hack, which was
242*cda5da8dSAndroid Build Coastguard Worker        # necessary in the older parser, which could raise errors.  All
243*cda5da8dSAndroid Build Coastguard Worker        # remaining lines in the input are thrown into the message body.
244*cda5da8dSAndroid Build Coastguard Worker        if self._headersonly:
245*cda5da8dSAndroid Build Coastguard Worker            lines = []
246*cda5da8dSAndroid Build Coastguard Worker            while True:
247*cda5da8dSAndroid Build Coastguard Worker                line = self._input.readline()
248*cda5da8dSAndroid Build Coastguard Worker                if line is NeedMoreData:
249*cda5da8dSAndroid Build Coastguard Worker                    yield NeedMoreData
250*cda5da8dSAndroid Build Coastguard Worker                    continue
251*cda5da8dSAndroid Build Coastguard Worker                if line == '':
252*cda5da8dSAndroid Build Coastguard Worker                    break
253*cda5da8dSAndroid Build Coastguard Worker                lines.append(line)
254*cda5da8dSAndroid Build Coastguard Worker            self._cur.set_payload(EMPTYSTRING.join(lines))
255*cda5da8dSAndroid Build Coastguard Worker            return
256*cda5da8dSAndroid Build Coastguard Worker        if self._cur.get_content_type() == 'message/delivery-status':
257*cda5da8dSAndroid Build Coastguard Worker            # message/delivery-status contains blocks of headers separated by
258*cda5da8dSAndroid Build Coastguard Worker            # a blank line.  We'll represent each header block as a separate
259*cda5da8dSAndroid Build Coastguard Worker            # nested message object, but the processing is a bit different
260*cda5da8dSAndroid Build Coastguard Worker            # than standard message/* types because there is no body for the
261*cda5da8dSAndroid Build Coastguard Worker            # nested messages.  A blank line separates the subparts.
262*cda5da8dSAndroid Build Coastguard Worker            while True:
263*cda5da8dSAndroid Build Coastguard Worker                self._input.push_eof_matcher(NLCRE.match)
264*cda5da8dSAndroid Build Coastguard Worker                for retval in self._parsegen():
265*cda5da8dSAndroid Build Coastguard Worker                    if retval is NeedMoreData:
266*cda5da8dSAndroid Build Coastguard Worker                        yield NeedMoreData
267*cda5da8dSAndroid Build Coastguard Worker                        continue
268*cda5da8dSAndroid Build Coastguard Worker                    break
269*cda5da8dSAndroid Build Coastguard Worker                msg = self._pop_message()
270*cda5da8dSAndroid Build Coastguard Worker                # We need to pop the EOF matcher in order to tell if we're at
271*cda5da8dSAndroid Build Coastguard Worker                # the end of the current file, not the end of the last block
272*cda5da8dSAndroid Build Coastguard Worker                # of message headers.
273*cda5da8dSAndroid Build Coastguard Worker                self._input.pop_eof_matcher()
274*cda5da8dSAndroid Build Coastguard Worker                # The input stream must be sitting at the newline or at the
275*cda5da8dSAndroid Build Coastguard Worker                # EOF.  We want to see if we're at the end of this subpart, so
276*cda5da8dSAndroid Build Coastguard Worker                # first consume the blank line, then test the next line to see
277*cda5da8dSAndroid Build Coastguard Worker                # if we're at this subpart's EOF.
278*cda5da8dSAndroid Build Coastguard Worker                while True:
279*cda5da8dSAndroid Build Coastguard Worker                    line = self._input.readline()
280*cda5da8dSAndroid Build Coastguard Worker                    if line is NeedMoreData:
281*cda5da8dSAndroid Build Coastguard Worker                        yield NeedMoreData
282*cda5da8dSAndroid Build Coastguard Worker                        continue
283*cda5da8dSAndroid Build Coastguard Worker                    break
284*cda5da8dSAndroid Build Coastguard Worker                while True:
285*cda5da8dSAndroid Build Coastguard Worker                    line = self._input.readline()
286*cda5da8dSAndroid Build Coastguard Worker                    if line is NeedMoreData:
287*cda5da8dSAndroid Build Coastguard Worker                        yield NeedMoreData
288*cda5da8dSAndroid Build Coastguard Worker                        continue
289*cda5da8dSAndroid Build Coastguard Worker                    break
290*cda5da8dSAndroid Build Coastguard Worker                if line == '':
291*cda5da8dSAndroid Build Coastguard Worker                    break
292*cda5da8dSAndroid Build Coastguard Worker                # Not at EOF so this is a line we're going to need.
293*cda5da8dSAndroid Build Coastguard Worker                self._input.unreadline(line)
294*cda5da8dSAndroid Build Coastguard Worker            return
295*cda5da8dSAndroid Build Coastguard Worker        if self._cur.get_content_maintype() == 'message':
296*cda5da8dSAndroid Build Coastguard Worker            # The message claims to be a message/* type, then what follows is
297*cda5da8dSAndroid Build Coastguard Worker            # another RFC 2822 message.
298*cda5da8dSAndroid Build Coastguard Worker            for retval in self._parsegen():
299*cda5da8dSAndroid Build Coastguard Worker                if retval is NeedMoreData:
300*cda5da8dSAndroid Build Coastguard Worker                    yield NeedMoreData
301*cda5da8dSAndroid Build Coastguard Worker                    continue
302*cda5da8dSAndroid Build Coastguard Worker                break
303*cda5da8dSAndroid Build Coastguard Worker            self._pop_message()
304*cda5da8dSAndroid Build Coastguard Worker            return
305*cda5da8dSAndroid Build Coastguard Worker        if self._cur.get_content_maintype() == 'multipart':
306*cda5da8dSAndroid Build Coastguard Worker            boundary = self._cur.get_boundary()
307*cda5da8dSAndroid Build Coastguard Worker            if boundary is None:
308*cda5da8dSAndroid Build Coastguard Worker                # The message /claims/ to be a multipart but it has not
309*cda5da8dSAndroid Build Coastguard Worker                # defined a boundary.  That's a problem which we'll handle by
310*cda5da8dSAndroid Build Coastguard Worker                # reading everything until the EOF and marking the message as
311*cda5da8dSAndroid Build Coastguard Worker                # defective.
312*cda5da8dSAndroid Build Coastguard Worker                defect = errors.NoBoundaryInMultipartDefect()
313*cda5da8dSAndroid Build Coastguard Worker                self.policy.handle_defect(self._cur, defect)
314*cda5da8dSAndroid Build Coastguard Worker                lines = []
315*cda5da8dSAndroid Build Coastguard Worker                for line in self._input:
316*cda5da8dSAndroid Build Coastguard Worker                    if line is NeedMoreData:
317*cda5da8dSAndroid Build Coastguard Worker                        yield NeedMoreData
318*cda5da8dSAndroid Build Coastguard Worker                        continue
319*cda5da8dSAndroid Build Coastguard Worker                    lines.append(line)
320*cda5da8dSAndroid Build Coastguard Worker                self._cur.set_payload(EMPTYSTRING.join(lines))
321*cda5da8dSAndroid Build Coastguard Worker                return
322*cda5da8dSAndroid Build Coastguard Worker            # Make sure a valid content type was specified per RFC 2045:6.4.
323*cda5da8dSAndroid Build Coastguard Worker            if (str(self._cur.get('content-transfer-encoding', '8bit')).lower()
324*cda5da8dSAndroid Build Coastguard Worker                    not in ('7bit', '8bit', 'binary')):
325*cda5da8dSAndroid Build Coastguard Worker                defect = errors.InvalidMultipartContentTransferEncodingDefect()
326*cda5da8dSAndroid Build Coastguard Worker                self.policy.handle_defect(self._cur, defect)
327*cda5da8dSAndroid Build Coastguard Worker            # Create a line match predicate which matches the inter-part
328*cda5da8dSAndroid Build Coastguard Worker            # boundary as well as the end-of-multipart boundary.  Don't push
329*cda5da8dSAndroid Build Coastguard Worker            # this onto the input stream until we've scanned past the
330*cda5da8dSAndroid Build Coastguard Worker            # preamble.
331*cda5da8dSAndroid Build Coastguard Worker            separator = '--' + boundary
332*cda5da8dSAndroid Build Coastguard Worker            boundaryre = re.compile(
333*cda5da8dSAndroid Build Coastguard Worker                '(?P<sep>' + re.escape(separator) +
334*cda5da8dSAndroid Build Coastguard Worker                r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
335*cda5da8dSAndroid Build Coastguard Worker            capturing_preamble = True
336*cda5da8dSAndroid Build Coastguard Worker            preamble = []
337*cda5da8dSAndroid Build Coastguard Worker            linesep = False
338*cda5da8dSAndroid Build Coastguard Worker            close_boundary_seen = False
339*cda5da8dSAndroid Build Coastguard Worker            while True:
340*cda5da8dSAndroid Build Coastguard Worker                line = self._input.readline()
341*cda5da8dSAndroid Build Coastguard Worker                if line is NeedMoreData:
342*cda5da8dSAndroid Build Coastguard Worker                    yield NeedMoreData
343*cda5da8dSAndroid Build Coastguard Worker                    continue
344*cda5da8dSAndroid Build Coastguard Worker                if line == '':
345*cda5da8dSAndroid Build Coastguard Worker                    break
346*cda5da8dSAndroid Build Coastguard Worker                mo = boundaryre.match(line)
347*cda5da8dSAndroid Build Coastguard Worker                if mo:
348*cda5da8dSAndroid Build Coastguard Worker                    # If we're looking at the end boundary, we're done with
349*cda5da8dSAndroid Build Coastguard Worker                    # this multipart.  If there was a newline at the end of
350*cda5da8dSAndroid Build Coastguard Worker                    # the closing boundary, then we need to initialize the
351*cda5da8dSAndroid Build Coastguard Worker                    # epilogue with the empty string (see below).
352*cda5da8dSAndroid Build Coastguard Worker                    if mo.group('end'):
353*cda5da8dSAndroid Build Coastguard Worker                        close_boundary_seen = True
354*cda5da8dSAndroid Build Coastguard Worker                        linesep = mo.group('linesep')
355*cda5da8dSAndroid Build Coastguard Worker                        break
356*cda5da8dSAndroid Build Coastguard Worker                    # We saw an inter-part boundary.  Were we in the preamble?
357*cda5da8dSAndroid Build Coastguard Worker                    if capturing_preamble:
358*cda5da8dSAndroid Build Coastguard Worker                        if preamble:
359*cda5da8dSAndroid Build Coastguard Worker                            # According to RFC 2046, the last newline belongs
360*cda5da8dSAndroid Build Coastguard Worker                            # to the boundary.
361*cda5da8dSAndroid Build Coastguard Worker                            lastline = preamble[-1]
362*cda5da8dSAndroid Build Coastguard Worker                            eolmo = NLCRE_eol.search(lastline)
363*cda5da8dSAndroid Build Coastguard Worker                            if eolmo:
364*cda5da8dSAndroid Build Coastguard Worker                                preamble[-1] = lastline[:-len(eolmo.group(0))]
365*cda5da8dSAndroid Build Coastguard Worker                            self._cur.preamble = EMPTYSTRING.join(preamble)
366*cda5da8dSAndroid Build Coastguard Worker                        capturing_preamble = False
367*cda5da8dSAndroid Build Coastguard Worker                        self._input.unreadline(line)
368*cda5da8dSAndroid Build Coastguard Worker                        continue
369*cda5da8dSAndroid Build Coastguard Worker                    # We saw a boundary separating two parts.  Consume any
370*cda5da8dSAndroid Build Coastguard Worker                    # multiple boundary lines that may be following.  Our
371*cda5da8dSAndroid Build Coastguard Worker                    # interpretation of RFC 2046 BNF grammar does not produce
372*cda5da8dSAndroid Build Coastguard Worker                    # body parts within such double boundaries.
373*cda5da8dSAndroid Build Coastguard Worker                    while True:
374*cda5da8dSAndroid Build Coastguard Worker                        line = self._input.readline()
375*cda5da8dSAndroid Build Coastguard Worker                        if line is NeedMoreData:
376*cda5da8dSAndroid Build Coastguard Worker                            yield NeedMoreData
377*cda5da8dSAndroid Build Coastguard Worker                            continue
378*cda5da8dSAndroid Build Coastguard Worker                        mo = boundaryre.match(line)
379*cda5da8dSAndroid Build Coastguard Worker                        if not mo:
380*cda5da8dSAndroid Build Coastguard Worker                            self._input.unreadline(line)
381*cda5da8dSAndroid Build Coastguard Worker                            break
382*cda5da8dSAndroid Build Coastguard Worker                    # Recurse to parse this subpart; the input stream points
383*cda5da8dSAndroid Build Coastguard Worker                    # at the subpart's first line.
384*cda5da8dSAndroid Build Coastguard Worker                    self._input.push_eof_matcher(boundaryre.match)
385*cda5da8dSAndroid Build Coastguard Worker                    for retval in self._parsegen():
386*cda5da8dSAndroid Build Coastguard Worker                        if retval is NeedMoreData:
387*cda5da8dSAndroid Build Coastguard Worker                            yield NeedMoreData
388*cda5da8dSAndroid Build Coastguard Worker                            continue
389*cda5da8dSAndroid Build Coastguard Worker                        break
390*cda5da8dSAndroid Build Coastguard Worker                    # Because of RFC 2046, the newline preceding the boundary
391*cda5da8dSAndroid Build Coastguard Worker                    # separator actually belongs to the boundary, not the
392*cda5da8dSAndroid Build Coastguard Worker                    # previous subpart's payload (or epilogue if the previous
393*cda5da8dSAndroid Build Coastguard Worker                    # part is a multipart).
394*cda5da8dSAndroid Build Coastguard Worker                    if self._last.get_content_maintype() == 'multipart':
395*cda5da8dSAndroid Build Coastguard Worker                        epilogue = self._last.epilogue
396*cda5da8dSAndroid Build Coastguard Worker                        if epilogue == '':
397*cda5da8dSAndroid Build Coastguard Worker                            self._last.epilogue = None
398*cda5da8dSAndroid Build Coastguard Worker                        elif epilogue is not None:
399*cda5da8dSAndroid Build Coastguard Worker                            mo = NLCRE_eol.search(epilogue)
400*cda5da8dSAndroid Build Coastguard Worker                            if mo:
401*cda5da8dSAndroid Build Coastguard Worker                                end = len(mo.group(0))
402*cda5da8dSAndroid Build Coastguard Worker                                self._last.epilogue = epilogue[:-end]
403*cda5da8dSAndroid Build Coastguard Worker                    else:
404*cda5da8dSAndroid Build Coastguard Worker                        payload = self._last._payload
405*cda5da8dSAndroid Build Coastguard Worker                        if isinstance(payload, str):
406*cda5da8dSAndroid Build Coastguard Worker                            mo = NLCRE_eol.search(payload)
407*cda5da8dSAndroid Build Coastguard Worker                            if mo:
408*cda5da8dSAndroid Build Coastguard Worker                                payload = payload[:-len(mo.group(0))]
409*cda5da8dSAndroid Build Coastguard Worker                                self._last._payload = payload
410*cda5da8dSAndroid Build Coastguard Worker                    self._input.pop_eof_matcher()
411*cda5da8dSAndroid Build Coastguard Worker                    self._pop_message()
412*cda5da8dSAndroid Build Coastguard Worker                    # Set the multipart up for newline cleansing, which will
413*cda5da8dSAndroid Build Coastguard Worker                    # happen if we're in a nested multipart.
414*cda5da8dSAndroid Build Coastguard Worker                    self._last = self._cur
415*cda5da8dSAndroid Build Coastguard Worker                else:
416*cda5da8dSAndroid Build Coastguard Worker                    # I think we must be in the preamble
417*cda5da8dSAndroid Build Coastguard Worker                    assert capturing_preamble
418*cda5da8dSAndroid Build Coastguard Worker                    preamble.append(line)
419*cda5da8dSAndroid Build Coastguard Worker            # We've seen either the EOF or the end boundary.  If we're still
420*cda5da8dSAndroid Build Coastguard Worker            # capturing the preamble, we never saw the start boundary.  Note
421*cda5da8dSAndroid Build Coastguard Worker            # that as a defect and store the captured text as the payload.
422*cda5da8dSAndroid Build Coastguard Worker            if capturing_preamble:
423*cda5da8dSAndroid Build Coastguard Worker                defect = errors.StartBoundaryNotFoundDefect()
424*cda5da8dSAndroid Build Coastguard Worker                self.policy.handle_defect(self._cur, defect)
425*cda5da8dSAndroid Build Coastguard Worker                self._cur.set_payload(EMPTYSTRING.join(preamble))
426*cda5da8dSAndroid Build Coastguard Worker                epilogue = []
427*cda5da8dSAndroid Build Coastguard Worker                for line in self._input:
428*cda5da8dSAndroid Build Coastguard Worker                    if line is NeedMoreData:
429*cda5da8dSAndroid Build Coastguard Worker                        yield NeedMoreData
430*cda5da8dSAndroid Build Coastguard Worker                        continue
431*cda5da8dSAndroid Build Coastguard Worker                self._cur.epilogue = EMPTYSTRING.join(epilogue)
432*cda5da8dSAndroid Build Coastguard Worker                return
433*cda5da8dSAndroid Build Coastguard Worker            # If we're not processing the preamble, then we might have seen
434*cda5da8dSAndroid Build Coastguard Worker            # EOF without seeing that end boundary...that is also a defect.
435*cda5da8dSAndroid Build Coastguard Worker            if not close_boundary_seen:
436*cda5da8dSAndroid Build Coastguard Worker                defect = errors.CloseBoundaryNotFoundDefect()
437*cda5da8dSAndroid Build Coastguard Worker                self.policy.handle_defect(self._cur, defect)
438*cda5da8dSAndroid Build Coastguard Worker                return
439*cda5da8dSAndroid Build Coastguard Worker            # Everything from here to the EOF is epilogue.  If the end boundary
440*cda5da8dSAndroid Build Coastguard Worker            # ended in a newline, we'll need to make sure the epilogue isn't
441*cda5da8dSAndroid Build Coastguard Worker            # None
442*cda5da8dSAndroid Build Coastguard Worker            if linesep:
443*cda5da8dSAndroid Build Coastguard Worker                epilogue = ['']
444*cda5da8dSAndroid Build Coastguard Worker            else:
445*cda5da8dSAndroid Build Coastguard Worker                epilogue = []
446*cda5da8dSAndroid Build Coastguard Worker            for line in self._input:
447*cda5da8dSAndroid Build Coastguard Worker                if line is NeedMoreData:
448*cda5da8dSAndroid Build Coastguard Worker                    yield NeedMoreData
449*cda5da8dSAndroid Build Coastguard Worker                    continue
450*cda5da8dSAndroid Build Coastguard Worker                epilogue.append(line)
451*cda5da8dSAndroid Build Coastguard Worker            # Any CRLF at the front of the epilogue is not technically part of
452*cda5da8dSAndroid Build Coastguard Worker            # the epilogue.  Also, watch out for an empty string epilogue,
453*cda5da8dSAndroid Build Coastguard Worker            # which means a single newline.
454*cda5da8dSAndroid Build Coastguard Worker            if epilogue:
455*cda5da8dSAndroid Build Coastguard Worker                firstline = epilogue[0]
456*cda5da8dSAndroid Build Coastguard Worker                bolmo = NLCRE_bol.match(firstline)
457*cda5da8dSAndroid Build Coastguard Worker                if bolmo:
458*cda5da8dSAndroid Build Coastguard Worker                    epilogue[0] = firstline[len(bolmo.group(0)):]
459*cda5da8dSAndroid Build Coastguard Worker            self._cur.epilogue = EMPTYSTRING.join(epilogue)
460*cda5da8dSAndroid Build Coastguard Worker            return
461*cda5da8dSAndroid Build Coastguard Worker        # Otherwise, it's some non-multipart type, so the entire rest of the
462*cda5da8dSAndroid Build Coastguard Worker        # file contents becomes the payload.
463*cda5da8dSAndroid Build Coastguard Worker        lines = []
464*cda5da8dSAndroid Build Coastguard Worker        for line in self._input:
465*cda5da8dSAndroid Build Coastguard Worker            if line is NeedMoreData:
466*cda5da8dSAndroid Build Coastguard Worker                yield NeedMoreData
467*cda5da8dSAndroid Build Coastguard Worker                continue
468*cda5da8dSAndroid Build Coastguard Worker            lines.append(line)
469*cda5da8dSAndroid Build Coastguard Worker        self._cur.set_payload(EMPTYSTRING.join(lines))
470*cda5da8dSAndroid Build Coastguard Worker
471*cda5da8dSAndroid Build Coastguard Worker    def _parse_headers(self, lines):
472*cda5da8dSAndroid Build Coastguard Worker        # Passed a list of lines that make up the headers for the current msg
473*cda5da8dSAndroid Build Coastguard Worker        lastheader = ''
474*cda5da8dSAndroid Build Coastguard Worker        lastvalue = []
475*cda5da8dSAndroid Build Coastguard Worker        for lineno, line in enumerate(lines):
476*cda5da8dSAndroid Build Coastguard Worker            # Check for continuation
477*cda5da8dSAndroid Build Coastguard Worker            if line[0] in ' \t':
478*cda5da8dSAndroid Build Coastguard Worker                if not lastheader:
479*cda5da8dSAndroid Build Coastguard Worker                    # The first line of the headers was a continuation.  This
480*cda5da8dSAndroid Build Coastguard Worker                    # is illegal, so let's note the defect, store the illegal
481*cda5da8dSAndroid Build Coastguard Worker                    # line, and ignore it for purposes of headers.
482*cda5da8dSAndroid Build Coastguard Worker                    defect = errors.FirstHeaderLineIsContinuationDefect(line)
483*cda5da8dSAndroid Build Coastguard Worker                    self.policy.handle_defect(self._cur, defect)
484*cda5da8dSAndroid Build Coastguard Worker                    continue
485*cda5da8dSAndroid Build Coastguard Worker                lastvalue.append(line)
486*cda5da8dSAndroid Build Coastguard Worker                continue
487*cda5da8dSAndroid Build Coastguard Worker            if lastheader:
488*cda5da8dSAndroid Build Coastguard Worker                self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
489*cda5da8dSAndroid Build Coastguard Worker                lastheader, lastvalue = '', []
490*cda5da8dSAndroid Build Coastguard Worker            # Check for envelope header, i.e. unix-from
491*cda5da8dSAndroid Build Coastguard Worker            if line.startswith('From '):
492*cda5da8dSAndroid Build Coastguard Worker                if lineno == 0:
493*cda5da8dSAndroid Build Coastguard Worker                    # Strip off the trailing newline
494*cda5da8dSAndroid Build Coastguard Worker                    mo = NLCRE_eol.search(line)
495*cda5da8dSAndroid Build Coastguard Worker                    if mo:
496*cda5da8dSAndroid Build Coastguard Worker                        line = line[:-len(mo.group(0))]
497*cda5da8dSAndroid Build Coastguard Worker                    self._cur.set_unixfrom(line)
498*cda5da8dSAndroid Build Coastguard Worker                    continue
499*cda5da8dSAndroid Build Coastguard Worker                elif lineno == len(lines) - 1:
500*cda5da8dSAndroid Build Coastguard Worker                    # Something looking like a unix-from at the end - it's
501*cda5da8dSAndroid Build Coastguard Worker                    # probably the first line of the body, so push back the
502*cda5da8dSAndroid Build Coastguard Worker                    # line and stop.
503*cda5da8dSAndroid Build Coastguard Worker                    self._input.unreadline(line)
504*cda5da8dSAndroid Build Coastguard Worker                    return
505*cda5da8dSAndroid Build Coastguard Worker                else:
506*cda5da8dSAndroid Build Coastguard Worker                    # Weirdly placed unix-from line.  Note this as a defect
507*cda5da8dSAndroid Build Coastguard Worker                    # and ignore it.
508*cda5da8dSAndroid Build Coastguard Worker                    defect = errors.MisplacedEnvelopeHeaderDefect(line)
509*cda5da8dSAndroid Build Coastguard Worker                    self._cur.defects.append(defect)
510*cda5da8dSAndroid Build Coastguard Worker                    continue
511*cda5da8dSAndroid Build Coastguard Worker            # Split the line on the colon separating field name from value.
512*cda5da8dSAndroid Build Coastguard Worker            # There will always be a colon, because if there wasn't the part of
513*cda5da8dSAndroid Build Coastguard Worker            # the parser that calls us would have started parsing the body.
514*cda5da8dSAndroid Build Coastguard Worker            i = line.find(':')
515*cda5da8dSAndroid Build Coastguard Worker
516*cda5da8dSAndroid Build Coastguard Worker            # If the colon is on the start of the line the header is clearly
517*cda5da8dSAndroid Build Coastguard Worker            # malformed, but we might be able to salvage the rest of the
518*cda5da8dSAndroid Build Coastguard Worker            # message. Track the error but keep going.
519*cda5da8dSAndroid Build Coastguard Worker            if i == 0:
520*cda5da8dSAndroid Build Coastguard Worker                defect = errors.InvalidHeaderDefect("Missing header name.")
521*cda5da8dSAndroid Build Coastguard Worker                self._cur.defects.append(defect)
522*cda5da8dSAndroid Build Coastguard Worker                continue
523*cda5da8dSAndroid Build Coastguard Worker
524*cda5da8dSAndroid Build Coastguard Worker            assert i>0, "_parse_headers fed line with no : and no leading WS"
525*cda5da8dSAndroid Build Coastguard Worker            lastheader = line[:i]
526*cda5da8dSAndroid Build Coastguard Worker            lastvalue = [line]
527*cda5da8dSAndroid Build Coastguard Worker        # Done with all the lines, so handle the last header.
528*cda5da8dSAndroid Build Coastguard Worker        if lastheader:
529*cda5da8dSAndroid Build Coastguard Worker            self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
530*cda5da8dSAndroid Build Coastguard Worker
531*cda5da8dSAndroid Build Coastguard Worker
532*cda5da8dSAndroid Build Coastguard Workerclass BytesFeedParser(FeedParser):
533*cda5da8dSAndroid Build Coastguard Worker    """Like FeedParser, but feed accepts bytes."""
534*cda5da8dSAndroid Build Coastguard Worker
535*cda5da8dSAndroid Build Coastguard Worker    def feed(self, data):
536*cda5da8dSAndroid Build Coastguard Worker        super().feed(data.decode('ascii', 'surrogateescape'))
537