1const fs = require("fs").promises; 2const jsdom = require("jsdom"); 3const { JSDOM } = jsdom; 4const path = require("path"); 5 6/** 7 * Run this after outputting html into 'dist' 8 * It will update ../../../docs/ldml/*.anchors.json 9 * Use source control to see if the links have changed. 10 */ 11 12// We would ideally run marked and process the output here. 13// But that might introduce duplicate code. 14const DONE_ICON = "✅"; 15const GEAR_ICON = "⚙️"; 16const NONE_ICON = "∅"; 17const PACKAGE_ICON = ""; 18const SECTION_ICON = ""; 19const TYPE_ICON = ""; 20const WARN_ICON = "⚠️"; 21const POINT_ICON = ""; 22const MISSING_ICON = "❌"; 23 24/** 25 * 26 * @param {string} targetSection e.g. 'tr35-info' 27 * @param {string} anchor e.g. 'Parts' 28 * @returns 'tr35-info.md#Parts' 29 */ 30function constructLink(targetSection, anchor) { 31 const page = `${targetSection}.md`; 32 if (!anchor) { 33 return page; 34 } 35 return `${page}#${anchor}`; 36} 37 38/** 39 * Read the input .md file, and write to a corresponding .html file 40 * @param {string} infile path to input file 41 * @returns {Promise<string>} name of output file (for status update) 42 */ 43async function extractAnchors(infile) { 44 const basename = path.basename(infile, ".html"); 45 dirname = '../../../docs/ldml'; 46 console.log(`${SECTION_ICON} Reading ${infile}`); 47 let f1 = await fs.readFile(infile, "utf-8"); 48 49 // oh the irony of removing a BOM before posting to unicode.org 50 if (f1.charCodeAt(0) == 0xfeff) { 51 f1 = f1.substring(3); 52 } 53 54 const rawHtml = f1; 55 56 // now fix. Spin up a JSDOM so we can manipulate 57 const dom = new JSDOM(rawHtml); 58 const document = dom.window.document; 59 60 const anchors = new Set(); 61 const targets = new Set(); 62 63 function addAnchor(n) { 64 if (!n) return; 65 if (anchors.has(n)) { 66 console.error(`${WARN_ICON} ${constructLink(basename)}: Duplicate anchor: #${n}`); 67 } else { 68 anchors.add(n); 69 } 70 } 71 72 function addTarget(href) { 73 const INTRA_PAGE_LINK = /^#(.*)$/; // starts with # => 1=anchor 74 const TR_SECTION_LINK = /^(tr35(?:[^.]*)).html(?:#(.*)){0,1}$/; // => 1=basename, 2=anchor 75 const EXTERNAL_LINK = /^(http|https|mailto|ftp):.*$/; // scheme 76 // Error on all other links 77 78 const intra_page = INTRA_PAGE_LINK.exec(href); 79 const tr_section = TR_SECTION_LINK.exec(href); 80 const external = EXTERNAL_LINK.exec(href); 81 if (intra_page) { 82 // same page 83 targets.add(constructLink(basename, intra_page[1])); 84 } else if (tr_section) { 85 // another page 86 targets.add(constructLink(tr_section[1], tr_section[2])); 87 } else if (external) { 88 // external 89 // Do nothing 90 // TODO: add to list of external links? 91 } else { 92 console.error(`${WARN_ICON} ${basename}: Unknown anchor: ${href}`); 93 } 94 } 95 96 // extract anchors 97 for (const a of dom.window.document.getElementsByTagName("*")) { 98 const id = a.getAttribute("id"); 99 addAnchor(id); 100 101 if (a.tagName === 'A') { 102 const name = a.getAttribute("name"); 103 addAnchor(name); 104 } 105 } 106 // extract targets 107 for (const a of dom.window.document.getElementsByTagName("A")) { 108 const href = a.getAttribute("href"); 109 if (href) { 110 addTarget(href); 111 } 112 } 113 114 const coll = new Intl.Collator(['und']); 115 const anchorList = Array.from(anchors.values()).sort(coll.compare); 116 const anchorFile = path.join(dirname, `${basename}.anchors.json`); 117 await fs.writeFile(anchorFile, JSON.stringify(anchorList, null, ' ')); 118 const targetList = Array.from(targets.values()).sort(coll.compare); 119 return [basename, anchorList, targetList]; 120} 121 122/** 123 * Convert all files 124 * @returns Promise list of output files 125 */ 126async function extractAll() { 127 outbox = "./dist"; 128 129 const fileList = (await fs.readdir(outbox)) 130 .filter((f) => /\.html$/.test(f)) 131 .map((f) => path.join(outbox, f)); 132 return Promise.all(fileList.map(extractAnchors)); 133} 134 135async function checkAll() { 136 console.log(`${GEAR_ICON} Reading HTML`); 137 const checked = await extractAll(); 138 console.log(`${GEAR_ICON} Collecting internal links`); 139 140 const allInternalTargets = new Set(); 141 const allInternalAnchors = new Set(); 142 const sectionToTargets = { 143 // e.g. "tr35-info" : Set(["tr35-keyboards.md#Element_keyboard", …]) 144 }; 145 checked.forEach(([sourceSection,anchorList,targetList]) => { 146 allInternalAnchors.add(constructLink(sourceSection)); // example: 'tr35-collation.md' 147 targetList.forEach(target => allInternalTargets.add(target)); 148 sectionToTargets[sourceSection] = new Set(targetList); // for error checking 149 const myInternalAnchors = anchorList.map(anchor => constructLink(sourceSection, anchor)); 150 myInternalAnchors.forEach(anchor => allInternalAnchors.add(anchor)); // tr35-collation.md#Parts 151 }); 152 153 console.log(`${GEAR_ICON} Checking ${allInternalTargets.size} internal links against ${allInternalAnchors.size} anchors`); 154 155 const missingInternalLinks = new Set(); 156 157 for (const expectedAnchor of allInternalTargets.values()) { 158 if (!allInternalAnchors.has(expectedAnchor)) { 159 missingInternalLinks.add(expectedAnchor); 160 } 161 } 162 163 if (!!missingInternalLinks.size) { 164 for (expectedAnchor of missingInternalLinks.values()) { 165 // coalesce 166 const sourceSections = ((Object.entries(sectionToTargets) 167 .filter(([section,s]) => s.has(expectedAnchor))) // Does this section target this anchor? 168 .map(([section]) => constructLink(section)) // drop the set 169 .join(' & ') // join section name(s) 170 ) || '(unknown section(s))'; // error 171 console.error(`${MISSING_ICON} Broken internal link: ${sourceSections}: (${expectedAnchor})`); 172 } 173 console.error(`${WARN_ICON} ${missingInternalLinks.size} missing links.`); 174 process.exitCode = 1; 175 } 176 177 console.log(`${POINT_ICON} use: 'lychee --cache docs/ldml' to check external links`); 178 179 return checked.map(([anchorFile]) => anchorFile); 180} 181checkAll().then( 182 (x) => x.forEach(section => { 183 console.log(`${DONE_ICON} ${constructLink(section)}`); 184 }), 185 (e) => { 186 console.error(e); 187 process.exitCode = 1; 188 } 189); 190