xref: /aosp_15_r20/external/cldr/tools/scripts/tr-archive/fix-tocs.js (revision 912701f9769bb47905792267661f0baf2b85bed5)
1// Run this to update the ToCs in the TRs
2
3const { generateGfmToc } = require('@not-dalia/gfm-toc');
4const fs = require('fs').promises;
5const path = require('path');
6
7const dir = '../../../docs/ldml';
8const trfile = /^tr35.*\.md$/;
9
10const contentsStart = /^## <a name="Contents".*$/;  // beginning of contents: always has #Contents
11const contentsEnd = /^## .*$/; // end of contents: the next ##
12
13// ToC entries we don't want, for deletion
14const tocDelete = [
15    /^[ ]*\* \[.*Unicode Technical Standard.*$/,
16    /^[ ]*\* \[_Summary_\].*$/,
17    /^[ ]*\* \[_Status_\].*$/,
18    /^[ ]*\* \[Parts\].*$/,
19    /^[ ]*\* \[Contents of.*$/
20];
21
22const gfmOpts = {
23    // see gfm-toc docs
24    includeUnlinked: true,
25    createLinks: true,
26};
27
28/**
29 *
30 * @returns promise to array of source files
31 */
32async function getSrcFiles() {
33    const f = [];
34    const items = await fs.opendir(dir);
35    for await (const dirent of items) {
36        if (trfile.test(dirent.name)) {
37            f.push(path.join(dir, dirent.name));
38        }
39    }
40    return f;
41}
42
43/**
44 * Process a single file
45 * @param {String} f
46 * @returns
47 */
48async function processFile(f) {
49    console.log('Reading: ' + f);
50    const contents = await fs.readFile(f, 'utf-8');
51
52    // now, reinsert
53    const lines = contents.split(/(?:\r)?\n/);
54
55    // new lines go into this array.
56    const out = [];
57
58    let i;
59
60    // go through the lines, looking for the header to the old ToC.
61    for (i = 0; i < lines.length; i++) {
62        out.push(lines[i]); // Emit the header line for the old ToC
63        if (contentsStart.test(lines[i])) {
64            break;
65        }
66    }
67    if (i == lines.length) {
68        throw Error(`in ${f}: ran out of lines looking for start of ToC`);
69    }
70    i++;
71    out.push(''); // blank line before ToC
72
73    // Generate the ToC
74    let toc = generateGfmToc(contents, gfmOpts);
75
76    // Delete any patterns in tocDelete from the ToC
77    for (pat of tocDelete) {
78        if (pat.test(toc[0])) {
79            toc = toc.splice(1); // delete first entry
80        }
81    }
82
83    // Push the whole ToC out
84    out.push(toc.join('\n'));
85    out.push('');
86
87    // Now, look for the end of the old ToC
88    // (the next section following the old ToC)
89    for (; i < lines.length; i++) {
90        if (contentsEnd.test(lines[i])) {
91            break;
92        }
93    }
94    if (i == lines.length) {
95        throw Error(`in ${f}: ran out of lines looking for end of ToC`);
96    }
97
98    const oldcopyright = /(Copyright\s*[©]?)\s*([0-9]{4,4})[–-—](?:[0-9]{4,4})\s*Unicode/;
99    const newyear = new Date().getFullYear(); // 2023, etc
100    const badlink = /(\()(http[s]?:\/\/[w\.]*unicode\.org\/reports\/tr35\/)?tr35([^\./]*)\.html/g;
101    const badlink2 = /(\()(http[s]?:\/\/[w\.]*unicode\.org\/reports\/tr35)[\/]?#/g;
102    const badlink3 = /\(http(?:s)?:\/\/(?:www\.)?unicode\.org\/reports\/tr35(?:\/)?\)/g;
103
104    // Write out all remaining lines in the file.
105    for (; i < lines.length; i++) {
106        out.push(lines[i]
107            .replace(oldcopyright, `$1 $2–${newyear} Unicode`)
108            .replace(badlink, '$1tr35$3.md')
109            .replace(badlink2, '$1tr35.md#')
110            .replace(badlink3, '(tr35.md)'));
111    }
112
113    // Write the whole file to disk.
114    await fs.writeFile(f, out.join('\n'), 'utf-8');
115
116    return {
117        name: path.basename(f),
118        lines: out.length,
119        toclines: toc.length
120    };
121}
122
123// Process everything.
124
125getSrcFiles()
126    .then(f => Promise.all(f.map(p => processFile(p))))
127    .then(x => console.dir(x), console.error);
128