1const fs = require("fs").promises; 2const { marked } = require("marked"); 3const jsdom = require("jsdom"); 4const { JSDOM } = jsdom; 5const path = require("path"); 6const markedAlert = require('marked-alert'); 7 8// Setup some options for our markdown renderer 9marked.setOptions({ 10 renderer: new marked.Renderer(), 11 12 // Add a code highlighter 13 highlight: function (code, forlanguage) { 14 const hljs = require("highlight.js"); 15 language = hljs.getLanguage(forlanguage) ? forlanguage : "plaintext"; 16 return hljs.highlight(code, { language }).value; 17 }, 18 pedantic: false, 19 gfm: true, 20 breaks: false, 21 sanitize: false, 22 smartLists: true, 23 smartypants: false, 24 xhtml: false, 25}); 26 27marked.use(markedAlert()); 28 29/** 30 * Read the input .md file, and write to a corresponding .html file 31 * @param {string} infile path to input file 32 * @returns {Promise<string>} name of output file (for status update) 33 */ 34async function renderit(infile) { 35 console.log(`Reading ${infile}`); 36 basename = path.basename(infile, ".md"); 37 const outfile = path.join(path.dirname(infile), `${basename}.html`); 38 let f1 = await fs.readFile(infile, "utf-8"); 39 40 // oh the irony of removing a BOM before posting to unicode.org 41 if (f1.charCodeAt(0) == 0xfeff) { 42 f1 = f1.substring(3); 43 } 44 45 // render to HTML 46 const rawHtml = marked(f1); 47 48 // now fix. Spin up a JSDOM so we can manipulate 49 const dom = new JSDOM(rawHtml); 50 const document = dom.window.document; 51 52 // First the HEAD 53 const head = dom.window.document.getElementsByTagName("head")[0]; 54 55 // add CSS to HEAD 56 head.innerHTML = 57 head.innerHTML + 58 `<meta charset="utf-8">\n` + 59 `<link rel='stylesheet' type='text/css' media='screen' href='../reports-v2.css'>\n` + 60 `<link rel='stylesheet' type='text/css' media='screen' href='tr35.css'>\n`; 61 62 // Assume there's not already a title and that we need to add one. 63 if (dom.window.document.getElementsByTagName("title").length >= 1) { 64 console.log("Already had a <title>… not changing."); 65 } else { 66 const title = document.createElement("title"); 67 const first_h1_text = document.getElementsByTagName("h1")[0].textContent.replace(')Part', ') Part'); 68 title.appendChild(document.createTextNode(first_h1_text)) 69 head.appendChild(title); 70 } 71 72 // calculate the header object 73 const header = dom.window.document.createElement("div"); 74 header.setAttribute("class", "header"); 75 76 // taken from prior TRs, read from the header in 'header.html' 77 header.innerHTML = (await fs.readFile('header.html', 'utf-8')).trim(); 78 79 // Move all elements out of the top level body and into a subelement 80 // The subelement is <div class="body"/> 81 const body = dom.window.document.getElementsByTagName("body")[0]; 82 const bp = body.parentNode; 83 div = dom.window.document.createElement("div"); 84 div.setAttribute("class", "body"); 85 let sawFirstTable = false; 86 for (const e of body.childNodes) { 87 body.removeChild(e); 88 if (div.childNodes.length === 0 && e.tagName === 'P') { 89 // update title element to <h2 class="uaxtitle"/> 90 const newTitle = document.createElement('h2'); 91 newTitle.setAttribute("class", "uaxtitle"); 92 newTitle.appendChild(document.createTextNode(e.textContent)); 93 div.appendChild(newTitle); 94 } else { 95 if (!sawFirstTable && e.tagName === 'TABLE') { 96 // Update first table to simple width=90% 97 // The first table is the document header (Author, etc.) 98 e.setAttribute("class", "simple"); 99 e.setAttribute("width", "90%"); 100 sawFirstTable = true; 101 } 102 div.appendChild(e); 103 } 104 } 105 106 /** 107 * create a <SCRIPT/> object. 108 * Choose ONE of src or code. 109 * @param {Object} obj 110 * @param {string} obj.src source of script as url 111 * @param {string} obj.code code for script as text 112 * @returns 113 */ 114 function getScript({src, code}) { 115 const script = dom.window.document.createElement("script"); 116 if (src) { 117 script.setAttribute("src", src); 118 } 119 if (code) { 120 script.appendChild(dom.window.document.createTextNode(code)); 121 } 122 return script; 123 } 124 125 // body already has no content to it at this point. 126 // Add all the pieces back. 127 body.appendChild(getScript({ src: './js/anchor.min.js' })); 128 body.appendChild(header); 129 body.appendChild(div); 130 131 // now, fix all links from ….md#… to ….html#… 132 for (const e of dom.window.document.getElementsByTagName("a")) { 133 const href = e.getAttribute("href"); 134 let m; 135 if ((m = /^(.*)\.md#(.*)$/.exec(href))) { 136 e.setAttribute("href", `${m[1]}.html#${m[2]}`); 137 } else if ((m = /^(.*)\.md$/.exec(href))) { 138 e.setAttribute("href", `${m[1]}.html`); 139 } 140 } 141 142 // put this last 143 body.appendChild(getScript({ 144 // This invokes anchor.js 145 code: `anchors.add('h1, h2, h3, h4, h5, h6, caption, dfn');` 146 })); 147 148 // Now, fixup captions 149 // Look for: <h6>Table: …</h6> followed by <table>…</table> 150 // Move the h6 inside the table, but as <caption/> 151 const h6es = dom.window.document.getElementsByTagName("h6"); 152 const toRemove = []; 153 for (const h6 of h6es) { 154 if (!h6.innerHTML.startsWith("Table: ")) { 155 console.error('Does not start with Table: ' + h6.innerHTML); 156 continue; // no 'Table:' marker. 157 } 158 const next = h6.nextElementSibling; 159 if (next.tagName !== 'TABLE') { 160 console.error('Not a following table for ' + h6.innerHTML); 161 continue; // Next item is not a table. Maybe a PRE or something. 162 } 163 const caption = dom.window.document.createElement("caption"); 164 for (const e of h6.childNodes) { 165 // h6.removeChild(e); 166 caption.appendChild(e.cloneNode(true)); 167 } 168 for (const p of h6.attributes) { 169 caption.setAttribute(p.name, p.value); 170 h6.removeAttribute(p.name); // so that it does not have a conflicting id 171 } 172 next.prepend(caption); 173 toRemove.push(h6); 174 } 175 for (const h6 of toRemove) { 176 h6.remove(); 177 } 178 179 // Drop generated anchors where there is an explicit anchor 180 const anchors = dom.window.document.getElementsByTagName("a"); 181 for (const a of anchors) { 182 // a needs to have a name 183 const aname = a.getAttribute('name'); 184 if (!aname) continue; 185 // parent needs to have a single child node and its own 'id'. 186 const parent = a.parentElement; 187 if (parent.childElementCount !== 1) continue; 188 const parid = parent.getAttribute('id'); 189 if(!parid) continue; 190 // Criteria met. swap the name and id 191 parent.setAttribute('id', aname); 192 a.setAttribute('name', parid); 193 } 194 195 // OK, done munging the DOM, write it out. 196 console.log(`Writing ${outfile}`); 197 198 // TODO: we assume that DOCTYPE is not written. 199 await fs.writeFile(outfile, `<!DOCTYPE html>\n` 200 + dom.serialize()); 201 return outfile; 202} 203 204/** 205 * Convert all files 206 * @returns Promise<String[]> list of output files 207 */ 208async function fixall() { 209 outbox = "./dist"; 210 211 // TODO: move source file copy into JavaScript? 212 // srcbox = '../../../docs/ldml'; 213 214 const fileList = (await fs.readdir(outbox)) 215 .filter((f) => /\.md$/.test(f)) 216 .map((f) => path.join(outbox, f)); 217 return Promise.all(fileList.map(renderit)); 218} 219 220fixall().then( 221 (x) => console.dir(x), 222 (e) => { 223 console.error(e); 224 process.exitCode = 1; 225 } 226); 227