xref: /aosp_15_r20/external/cldr/tools/scripts/tr-archive/archive.js (revision 912701f9769bb47905792267661f0baf2b85bed5)
1const fs = require("fs").promises;
2const { marked } = require("marked");
3const jsdom = require("jsdom");
4const { JSDOM } = jsdom;
5const path = require("path");
6const markedAlert = require('marked-alert');
7
8// Setup some options for our markdown renderer
9marked.setOptions({
10  renderer: new marked.Renderer(),
11
12  // Add a code highlighter
13  highlight: function (code, forlanguage) {
14    const hljs = require("highlight.js");
15    language = hljs.getLanguage(forlanguage) ? forlanguage : "plaintext";
16    return hljs.highlight(code, { language }).value;
17  },
18  pedantic: false,
19  gfm: true,
20  breaks: false,
21  sanitize: false,
22  smartLists: true,
23  smartypants: false,
24  xhtml: false,
25});
26
27marked.use(markedAlert());
28
29/**
30 * Read the input .md file, and write to a corresponding .html file
31 * @param {string} infile path to input file
32 * @returns {Promise<string>} name of output file (for status update)
33 */
34async function renderit(infile) {
35  console.log(`Reading ${infile}`);
36  basename = path.basename(infile, ".md");
37  const outfile = path.join(path.dirname(infile), `${basename}.html`);
38  let f1 = await fs.readFile(infile, "utf-8");
39
40  // oh the irony of removing a BOM before posting to unicode.org
41  if (f1.charCodeAt(0) == 0xfeff) {
42    f1 = f1.substring(3);
43  }
44
45  // render to HTML
46  const rawHtml = marked(f1);
47
48  // now fix. Spin up a JSDOM so we can manipulate
49  const dom = new JSDOM(rawHtml);
50  const document = dom.window.document;
51
52  // First the HEAD
53  const head = dom.window.document.getElementsByTagName("head")[0];
54
55  // add CSS to HEAD
56  head.innerHTML =
57    head.innerHTML +
58    `<meta charset="utf-8">\n` +
59    `<link rel='stylesheet' type='text/css' media='screen' href='../reports-v2.css'>\n` +
60    `<link rel='stylesheet' type='text/css' media='screen' href='tr35.css'>\n`;
61
62  // Assume there's not already a title and that we need to add one.
63  if (dom.window.document.getElementsByTagName("title").length >= 1) {
64    console.log("Already had a <title>… not changing.");
65  } else {
66    const title = document.createElement("title");
67    const first_h1_text = document.getElementsByTagName("h1")[0].textContent.replace(')Part', ') Part');
68    title.appendChild(document.createTextNode(first_h1_text))
69    head.appendChild(title);
70  }
71
72  // calculate the header object
73  const header = dom.window.document.createElement("div");
74  header.setAttribute("class", "header");
75
76  // taken from prior TRs, read from the header in 'header.html'
77  header.innerHTML = (await fs.readFile('header.html', 'utf-8')).trim();
78
79  // Move all elements out of the top level body and into a subelement
80  // The subelement is <div class="body"/>
81  const body = dom.window.document.getElementsByTagName("body")[0];
82  const bp = body.parentNode;
83  div = dom.window.document.createElement("div");
84  div.setAttribute("class", "body");
85  let sawFirstTable = false;
86  for (const e of body.childNodes) {
87    body.removeChild(e);
88    if (div.childNodes.length === 0 && e.tagName === 'P') {
89      // update title element to <h2 class="uaxtitle"/>
90      const newTitle = document.createElement('h2');
91      newTitle.setAttribute("class", "uaxtitle");
92      newTitle.appendChild(document.createTextNode(e.textContent));
93      div.appendChild(newTitle);
94    } else {
95      if (!sawFirstTable && e.tagName === 'TABLE') {
96        // Update first table to simple width=90%
97        // The first table is the document header (Author, etc.)
98        e.setAttribute("class", "simple");
99        e.setAttribute("width", "90%");
100        sawFirstTable = true;
101      }
102      div.appendChild(e);
103    }
104  }
105
106  /**
107   * create a <SCRIPT/> object.
108   * Choose ONE of src or code.
109   * @param {Object} obj
110   * @param {string} obj.src source of script as url
111   * @param {string} obj.code code for script as text
112   * @returns
113   */
114  function getScript({src, code})  {
115    const script = dom.window.document.createElement("script");
116    if (src) {
117      script.setAttribute("src", src);
118    }
119    if (code) {
120      script.appendChild(dom.window.document.createTextNode(code));
121    }
122    return script;
123  }
124
125  // body already has no content to it at this point.
126  // Add all the pieces back.
127  body.appendChild(getScript({ src: './js/anchor.min.js' }));
128  body.appendChild(header);
129  body.appendChild(div);
130
131  // now, fix all links from  ….md#…  to ….html#…
132  for (const e of dom.window.document.getElementsByTagName("a")) {
133    const href = e.getAttribute("href");
134    let m;
135    if ((m = /^(.*)\.md#(.*)$/.exec(href))) {
136      e.setAttribute("href", `${m[1]}.html#${m[2]}`);
137    } else if ((m = /^(.*)\.md$/.exec(href))) {
138      e.setAttribute("href", `${m[1]}.html`);
139    }
140  }
141
142  // put this last
143  body.appendChild(getScript({
144    // This invokes anchor.js
145    code: `anchors.add('h1, h2, h3, h4, h5, h6, caption, dfn');`
146  }));
147
148  // Now, fixup captions
149  // Look for:  <h6>Table: …</h6> followed by <table>…</table>
150  // Move the h6 inside the table, but as <caption/>
151  const h6es = dom.window.document.getElementsByTagName("h6");
152  const toRemove = [];
153  for (const h6 of h6es) {
154    if (!h6.innerHTML.startsWith("Table: ")) {
155      console.error('Does not start with Table: ' + h6.innerHTML);
156      continue; // no 'Table:' marker.
157    }
158    const next = h6.nextElementSibling;
159    if (next.tagName !== 'TABLE') {
160      console.error('Not a following table for ' + h6.innerHTML);
161      continue; // Next item is not a table. Maybe a PRE or something.
162    }
163    const caption = dom.window.document.createElement("caption");
164    for (const e of h6.childNodes) {
165      // h6.removeChild(e);
166      caption.appendChild(e.cloneNode(true));
167    }
168    for (const p of h6.attributes) {
169      caption.setAttribute(p.name, p.value);
170      h6.removeAttribute(p.name); // so that it does not have a conflicting id
171    }
172    next.prepend(caption);
173    toRemove.push(h6);
174  }
175  for (const h6 of toRemove) {
176    h6.remove();
177  }
178
179  // Drop generated anchors where there is an explicit anchor
180  const anchors = dom.window.document.getElementsByTagName("a");
181  for (const a of anchors) {
182    // a needs to have a name
183    const aname = a.getAttribute('name');
184    if (!aname) continue;
185    // parent needs to have a single child node and its own 'id'.
186    const parent = a.parentElement;
187    if (parent.childElementCount !== 1) continue;
188    const parid = parent.getAttribute('id');
189    if(!parid) continue;
190    // Criteria met. swap the name and id
191    parent.setAttribute('id', aname);
192    a.setAttribute('name', parid);
193  }
194
195  // OK, done munging the DOM, write it out.
196  console.log(`Writing ${outfile}`);
197
198  // TODO: we assume that DOCTYPE is not written.
199  await fs.writeFile(outfile, `<!DOCTYPE html>\n`
200                              + dom.serialize());
201  return outfile;
202}
203
204/**
205 * Convert all files
206 * @returns Promise<String[]> list of output files
207 */
208async function fixall() {
209  outbox = "./dist";
210
211  // TODO: move source file copy into JavaScript?
212  // srcbox = '../../../docs/ldml';
213
214  const fileList = (await fs.readdir(outbox))
215    .filter((f) => /\.md$/.test(f))
216    .map((f) => path.join(outbox, f));
217  return Promise.all(fileList.map(renderit));
218}
219
220fixall().then(
221  (x) => console.dir(x),
222  (e) => {
223    console.error(e);
224    process.exitCode = 1;
225  }
226);
227