const fs = require("fs").promises; const jsdom = require("jsdom"); const { JSDOM } = jsdom; const path = require("path"); /** * Run this after outputting html into 'dist' * It will update ../../../docs/ldml/*.anchors.json * Use source control to see if the links have changed. */ // We would ideally run marked and process the output here. // But that might introduce duplicate code. const DONE_ICON = "✅"; const GEAR_ICON = "⚙️"; const NONE_ICON = "∅"; const PACKAGE_ICON = "📦"; const SECTION_ICON = "📍"; const TYPE_ICON = "📂"; const WARN_ICON = "⚠️"; const POINT_ICON = "👉"; const MISSING_ICON = "❌"; /** * * @param {string} targetSection e.g. 'tr35-info' * @param {string} anchor e.g. 'Parts' * @returns 'tr35-info.md#Parts' */ function constructLink(targetSection, anchor) { const page = `${targetSection}.md`; if (!anchor) { return page; } return `${page}#${anchor}`; } /** * Read the input .md file, and write to a corresponding .html file * @param {string} infile path to input file * @returns {Promise} name of output file (for status update) */ async function extractAnchors(infile) { const basename = path.basename(infile, ".html"); dirname = '../../../docs/ldml'; console.log(`${SECTION_ICON} Reading ${infile}`); let f1 = await fs.readFile(infile, "utf-8"); // oh the irony of removing a BOM before posting to unicode.org if (f1.charCodeAt(0) == 0xfeff) { f1 = f1.substring(3); } const rawHtml = f1; // now fix. Spin up a JSDOM so we can manipulate const dom = new JSDOM(rawHtml); const document = dom.window.document; const anchors = new Set(); const targets = new Set(); function addAnchor(n) { if (!n) return; if (anchors.has(n)) { console.error(`${WARN_ICON} ${constructLink(basename)}: Duplicate anchor: #${n}`); } else { anchors.add(n); } } function addTarget(href) { const INTRA_PAGE_LINK = /^#(.*)$/; // starts with # => 1=anchor const TR_SECTION_LINK = /^(tr35(?:[^.]*)).html(?:#(.*)){0,1}$/; // => 1=basename, 2=anchor const EXTERNAL_LINK = /^(http|https|mailto|ftp):.*$/; // scheme // Error on all other links const intra_page = INTRA_PAGE_LINK.exec(href); const tr_section = TR_SECTION_LINK.exec(href); const external = EXTERNAL_LINK.exec(href); if (intra_page) { // same page targets.add(constructLink(basename, intra_page[1])); } else if (tr_section) { // another page targets.add(constructLink(tr_section[1], tr_section[2])); } else if (external) { // external // Do nothing // TODO: add to list of external links? } else { console.error(`${WARN_ICON} ${basename}: Unknown anchor: ${href}`); } } // extract anchors for (const a of dom.window.document.getElementsByTagName("*")) { const id = a.getAttribute("id"); addAnchor(id); if (a.tagName === 'A') { const name = a.getAttribute("name"); addAnchor(name); } } // extract targets for (const a of dom.window.document.getElementsByTagName("A")) { const href = a.getAttribute("href"); if (href) { addTarget(href); } } const coll = new Intl.Collator(['und']); const anchorList = Array.from(anchors.values()).sort(coll.compare); const anchorFile = path.join(dirname, `${basename}.anchors.json`); await fs.writeFile(anchorFile, JSON.stringify(anchorList, null, ' ')); const targetList = Array.from(targets.values()).sort(coll.compare); return [basename, anchorList, targetList]; } /** * Convert all files * @returns Promise list of output files */ async function extractAll() { outbox = "./dist"; const fileList = (await fs.readdir(outbox)) .filter((f) => /\.html$/.test(f)) .map((f) => path.join(outbox, f)); return Promise.all(fileList.map(extractAnchors)); } async function checkAll() { console.log(`${GEAR_ICON} Reading HTML`); const checked = await extractAll(); console.log(`${GEAR_ICON} Collecting internal links`); const allInternalTargets = new Set(); const allInternalAnchors = new Set(); const sectionToTargets = { // e.g. "tr35-info" : Set(["tr35-keyboards.md#Element_keyboard", …]) }; checked.forEach(([sourceSection,anchorList,targetList]) => { allInternalAnchors.add(constructLink(sourceSection)); // example: 'tr35-collation.md' targetList.forEach(target => allInternalTargets.add(target)); sectionToTargets[sourceSection] = new Set(targetList); // for error checking const myInternalAnchors = anchorList.map(anchor => constructLink(sourceSection, anchor)); myInternalAnchors.forEach(anchor => allInternalAnchors.add(anchor)); // tr35-collation.md#Parts }); console.log(`${GEAR_ICON} Checking ${allInternalTargets.size} internal links against ${allInternalAnchors.size} anchors`); const missingInternalLinks = new Set(); for (const expectedAnchor of allInternalTargets.values()) { if (!allInternalAnchors.has(expectedAnchor)) { missingInternalLinks.add(expectedAnchor); } } if (!!missingInternalLinks.size) { for (expectedAnchor of missingInternalLinks.values()) { // coalesce const sourceSections = ((Object.entries(sectionToTargets) .filter(([section,s]) => s.has(expectedAnchor))) // Does this section target this anchor? .map(([section]) => constructLink(section)) // drop the set .join(' & ') // join section name(s) ) || '(unknown section(s))'; // error console.error(`${MISSING_ICON} Broken internal link: ${sourceSections}: (${expectedAnchor})`); } console.error(`${WARN_ICON} ${missingInternalLinks.size} missing links.`); process.exitCode = 1; } console.log(`${POINT_ICON} use: 'lychee --cache docs/ldml' to check external links`); return checked.map(([anchorFile]) => anchorFile); } checkAll().then( (x) => x.forEach(section => { console.log(`${DONE_ICON} ${constructLink(section)}`); }), (e) => { console.error(e); process.exitCode = 1; } );