1"""
2Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
3effect on some MBCS Windows systems.
4
5https://bugs.python.org/issue32174
6"""
7
8import pathlib
9import re
10from html.entities import codepoint2name
11
12from sphinx.util.logging import getLogger
13
14# escape the characters which codepoint > 0x7F
15def _process(string):
16    def escape(matchobj):
17        codepoint = ord(matchobj.group(0))
18
19        name = codepoint2name.get(codepoint)
20        if name is None:
21            return '&#%d;' % codepoint
22        else:
23            return '&%s;' % name
24
25    return re.sub(r'[^\x00-\x7F]', escape, string)
26
27def escape_for_chm(app, pagename, templatename, context, doctree):
28    # only works for .chm output
29    if getattr(app.builder, 'name', '') != 'htmlhelp':
30        return
31
32    # escape the `body` part to 7-bit ASCII
33    body = context.get('body')
34    if body is not None:
35        context['body'] = _process(body)
36
37def fixup_keywords(app, exception):
38    # only works for .chm output
39    if getattr(app.builder, 'name', '') != 'htmlhelp' or exception:
40        return
41
42    getLogger(__name__).info('fixing HTML escapes in keywords file...')
43    outdir = pathlib.Path(app.builder.outdir)
44    outname = app.builder.config.htmlhelp_basename
45    with open(outdir / (outname + '.hhk'), 'rb') as f:
46        index = f.read()
47    with open(outdir / (outname + '.hhk'), 'wb') as f:
48        f.write(index.replace(b''', b'''))
49
50def setup(app):
51    # `html-page-context` event emitted when the HTML builder has
52    # created a context dictionary to render a template with.
53    app.connect('html-page-context', escape_for_chm)
54    # `build-finished` event emitted when all the files have been
55    # output.
56    app.connect('build-finished', fixup_keywords)
57
58    return {'version': '1.0', 'parallel_read_safe': True}
59