xref: /aosp_15_r20/external/cldr/tools/scripts/tr-archive/fix-tocs.js (revision 912701f9769bb47905792267661f0baf2b85bed5)
1*912701f9SAndroid Build Coastguard Worker// Run this to update the ToCs in the TRs
2*912701f9SAndroid Build Coastguard Worker
3*912701f9SAndroid Build Coastguard Workerconst { generateGfmToc } = require('@not-dalia/gfm-toc');
4*912701f9SAndroid Build Coastguard Workerconst fs = require('fs').promises;
5*912701f9SAndroid Build Coastguard Workerconst path = require('path');
6*912701f9SAndroid Build Coastguard Worker
7*912701f9SAndroid Build Coastguard Workerconst dir = '../../../docs/ldml';
8*912701f9SAndroid Build Coastguard Workerconst trfile = /^tr35.*\.md$/;
9*912701f9SAndroid Build Coastguard Worker
10*912701f9SAndroid Build Coastguard Workerconst contentsStart = /^## <a name="Contents".*$/;  // beginning of contents: always has #Contents
11*912701f9SAndroid Build Coastguard Workerconst contentsEnd = /^## .*$/; // end of contents: the next ##
12*912701f9SAndroid Build Coastguard Worker
13*912701f9SAndroid Build Coastguard Worker// ToC entries we don't want, for deletion
14*912701f9SAndroid Build Coastguard Workerconst tocDelete = [
15*912701f9SAndroid Build Coastguard Worker    /^[ ]*\* \[.*Unicode Technical Standard.*$/,
16*912701f9SAndroid Build Coastguard Worker    /^[ ]*\* \[_Summary_\].*$/,
17*912701f9SAndroid Build Coastguard Worker    /^[ ]*\* \[_Status_\].*$/,
18*912701f9SAndroid Build Coastguard Worker    /^[ ]*\* \[Parts\].*$/,
19*912701f9SAndroid Build Coastguard Worker    /^[ ]*\* \[Contents of.*$/
20*912701f9SAndroid Build Coastguard Worker];
21*912701f9SAndroid Build Coastguard Worker
22*912701f9SAndroid Build Coastguard Workerconst gfmOpts = {
23*912701f9SAndroid Build Coastguard Worker    // see gfm-toc docs
24*912701f9SAndroid Build Coastguard Worker    includeUnlinked: true,
25*912701f9SAndroid Build Coastguard Worker    createLinks: true,
26*912701f9SAndroid Build Coastguard Worker};
27*912701f9SAndroid Build Coastguard Worker
28*912701f9SAndroid Build Coastguard Worker/**
29*912701f9SAndroid Build Coastguard Worker *
30*912701f9SAndroid Build Coastguard Worker * @returns promise to array of source files
31*912701f9SAndroid Build Coastguard Worker */
32*912701f9SAndroid Build Coastguard Workerasync function getSrcFiles() {
33*912701f9SAndroid Build Coastguard Worker    const f = [];
34*912701f9SAndroid Build Coastguard Worker    const items = await fs.opendir(dir);
35*912701f9SAndroid Build Coastguard Worker    for await (const dirent of items) {
36*912701f9SAndroid Build Coastguard Worker        if (trfile.test(dirent.name)) {
37*912701f9SAndroid Build Coastguard Worker            f.push(path.join(dir, dirent.name));
38*912701f9SAndroid Build Coastguard Worker        }
39*912701f9SAndroid Build Coastguard Worker    }
40*912701f9SAndroid Build Coastguard Worker    return f;
41*912701f9SAndroid Build Coastguard Worker}
42*912701f9SAndroid Build Coastguard Worker
43*912701f9SAndroid Build Coastguard Worker/**
44*912701f9SAndroid Build Coastguard Worker * Process a single file
45*912701f9SAndroid Build Coastguard Worker * @param {String} f
46*912701f9SAndroid Build Coastguard Worker * @returns
47*912701f9SAndroid Build Coastguard Worker */
48*912701f9SAndroid Build Coastguard Workerasync function processFile(f) {
49*912701f9SAndroid Build Coastguard Worker    console.log('Reading: ' + f);
50*912701f9SAndroid Build Coastguard Worker    const contents = await fs.readFile(f, 'utf-8');
51*912701f9SAndroid Build Coastguard Worker
52*912701f9SAndroid Build Coastguard Worker    // now, reinsert
53*912701f9SAndroid Build Coastguard Worker    const lines = contents.split(/(?:\r)?\n/);
54*912701f9SAndroid Build Coastguard Worker
55*912701f9SAndroid Build Coastguard Worker    // new lines go into this array.
56*912701f9SAndroid Build Coastguard Worker    const out = [];
57*912701f9SAndroid Build Coastguard Worker
58*912701f9SAndroid Build Coastguard Worker    let i;
59*912701f9SAndroid Build Coastguard Worker
60*912701f9SAndroid Build Coastguard Worker    // go through the lines, looking for the header to the old ToC.
61*912701f9SAndroid Build Coastguard Worker    for (i = 0; i < lines.length; i++) {
62*912701f9SAndroid Build Coastguard Worker        out.push(lines[i]); // Emit the header line for the old ToC
63*912701f9SAndroid Build Coastguard Worker        if (contentsStart.test(lines[i])) {
64*912701f9SAndroid Build Coastguard Worker            break;
65*912701f9SAndroid Build Coastguard Worker        }
66*912701f9SAndroid Build Coastguard Worker    }
67*912701f9SAndroid Build Coastguard Worker    if (i == lines.length) {
68*912701f9SAndroid Build Coastguard Worker        throw Error(`in ${f}: ran out of lines looking for start of ToC`);
69*912701f9SAndroid Build Coastguard Worker    }
70*912701f9SAndroid Build Coastguard Worker    i++;
71*912701f9SAndroid Build Coastguard Worker    out.push(''); // blank line before ToC
72*912701f9SAndroid Build Coastguard Worker
73*912701f9SAndroid Build Coastguard Worker    // Generate the ToC
74*912701f9SAndroid Build Coastguard Worker    let toc = generateGfmToc(contents, gfmOpts);
75*912701f9SAndroid Build Coastguard Worker
76*912701f9SAndroid Build Coastguard Worker    // Delete any patterns in tocDelete from the ToC
77*912701f9SAndroid Build Coastguard Worker    for (pat of tocDelete) {
78*912701f9SAndroid Build Coastguard Worker        if (pat.test(toc[0])) {
79*912701f9SAndroid Build Coastguard Worker            toc = toc.splice(1); // delete first entry
80*912701f9SAndroid Build Coastguard Worker        }
81*912701f9SAndroid Build Coastguard Worker    }
82*912701f9SAndroid Build Coastguard Worker
83*912701f9SAndroid Build Coastguard Worker    // Push the whole ToC out
84*912701f9SAndroid Build Coastguard Worker    out.push(toc.join('\n'));
85*912701f9SAndroid Build Coastguard Worker    out.push('');
86*912701f9SAndroid Build Coastguard Worker
87*912701f9SAndroid Build Coastguard Worker    // Now, look for the end of the old ToC
88*912701f9SAndroid Build Coastguard Worker    // (the next section following the old ToC)
89*912701f9SAndroid Build Coastguard Worker    for (; i < lines.length; i++) {
90*912701f9SAndroid Build Coastguard Worker        if (contentsEnd.test(lines[i])) {
91*912701f9SAndroid Build Coastguard Worker            break;
92*912701f9SAndroid Build Coastguard Worker        }
93*912701f9SAndroid Build Coastguard Worker    }
94*912701f9SAndroid Build Coastguard Worker    if (i == lines.length) {
95*912701f9SAndroid Build Coastguard Worker        throw Error(`in ${f}: ran out of lines looking for end of ToC`);
96*912701f9SAndroid Build Coastguard Worker    }
97*912701f9SAndroid Build Coastguard Worker
98*912701f9SAndroid Build Coastguard Worker    const oldcopyright = /(Copyright\s*[©]?)\s*([0-9]{4,4})[–-—](?:[0-9]{4,4})\s*Unicode/;
99*912701f9SAndroid Build Coastguard Worker    const newyear = new Date().getFullYear(); // 2023, etc
100*912701f9SAndroid Build Coastguard Worker    const badlink = /(\()(http[s]?:\/\/[w\.]*unicode\.org\/reports\/tr35\/)?tr35([^\./]*)\.html/g;
101*912701f9SAndroid Build Coastguard Worker    const badlink2 = /(\()(http[s]?:\/\/[w\.]*unicode\.org\/reports\/tr35)[\/]?#/g;
102*912701f9SAndroid Build Coastguard Worker    const badlink3 = /\(http(?:s)?:\/\/(?:www\.)?unicode\.org\/reports\/tr35(?:\/)?\)/g;
103*912701f9SAndroid Build Coastguard Worker
104*912701f9SAndroid Build Coastguard Worker    // Write out all remaining lines in the file.
105*912701f9SAndroid Build Coastguard Worker    for (; i < lines.length; i++) {
106*912701f9SAndroid Build Coastguard Worker        out.push(lines[i]
107*912701f9SAndroid Build Coastguard Worker            .replace(oldcopyright, `$1 $2–${newyear} Unicode`)
108*912701f9SAndroid Build Coastguard Worker            .replace(badlink, '$1tr35$3.md')
109*912701f9SAndroid Build Coastguard Worker            .replace(badlink2, '$1tr35.md#')
110*912701f9SAndroid Build Coastguard Worker            .replace(badlink3, '(tr35.md)'));
111*912701f9SAndroid Build Coastguard Worker    }
112*912701f9SAndroid Build Coastguard Worker
113*912701f9SAndroid Build Coastguard Worker    // Write the whole file to disk.
114*912701f9SAndroid Build Coastguard Worker    await fs.writeFile(f, out.join('\n'), 'utf-8');
115*912701f9SAndroid Build Coastguard Worker
116*912701f9SAndroid Build Coastguard Worker    return {
117*912701f9SAndroid Build Coastguard Worker        name: path.basename(f),
118*912701f9SAndroid Build Coastguard Worker        lines: out.length,
119*912701f9SAndroid Build Coastguard Worker        toclines: toc.length
120*912701f9SAndroid Build Coastguard Worker    };
121*912701f9SAndroid Build Coastguard Worker}
122*912701f9SAndroid Build Coastguard Worker
123*912701f9SAndroid Build Coastguard Worker// Process everything.
124*912701f9SAndroid Build Coastguard Worker
125*912701f9SAndroid Build Coastguard WorkergetSrcFiles()
126*912701f9SAndroid Build Coastguard Worker    .then(f => Promise.all(f.map(p => processFile(p))))
127*912701f9SAndroid Build Coastguard Worker    .then(x => console.dir(x), console.error);
128