1*912701f9SAndroid Build Coastguard Workerconst fs = require("fs").promises; 2*912701f9SAndroid Build Coastguard Workerconst jsdom = require("jsdom"); 3*912701f9SAndroid Build Coastguard Workerconst { JSDOM } = jsdom; 4*912701f9SAndroid Build Coastguard Workerconst path = require("path"); 5*912701f9SAndroid Build Coastguard Worker 6*912701f9SAndroid Build Coastguard Worker/** 7*912701f9SAndroid Build Coastguard Worker * Run this after outputting html into 'dist' 8*912701f9SAndroid Build Coastguard Worker * It will update ../../../docs/ldml/*.anchors.json 9*912701f9SAndroid Build Coastguard Worker * Use source control to see if the links have changed. 10*912701f9SAndroid Build Coastguard Worker */ 11*912701f9SAndroid Build Coastguard Worker 12*912701f9SAndroid Build Coastguard Worker// We would ideally run marked and process the output here. 13*912701f9SAndroid Build Coastguard Worker// But that might introduce duplicate code. 14*912701f9SAndroid Build Coastguard Workerconst DONE_ICON = "✅"; 15*912701f9SAndroid Build Coastguard Workerconst GEAR_ICON = "⚙️"; 16*912701f9SAndroid Build Coastguard Workerconst NONE_ICON = "∅"; 17*912701f9SAndroid Build Coastguard Workerconst PACKAGE_ICON = ""; 18*912701f9SAndroid Build Coastguard Workerconst SECTION_ICON = ""; 19*912701f9SAndroid Build Coastguard Workerconst TYPE_ICON = ""; 20*912701f9SAndroid Build Coastguard Workerconst WARN_ICON = "⚠️"; 21*912701f9SAndroid Build Coastguard Workerconst POINT_ICON = ""; 22*912701f9SAndroid Build Coastguard Workerconst MISSING_ICON = "❌"; 23*912701f9SAndroid Build Coastguard Worker 24*912701f9SAndroid Build Coastguard Worker/** 25*912701f9SAndroid Build Coastguard Worker * 26*912701f9SAndroid Build Coastguard Worker * @param {string} targetSection e.g. 'tr35-info' 27*912701f9SAndroid Build Coastguard Worker * @param {string} anchor e.g. 'Parts' 28*912701f9SAndroid Build Coastguard Worker * @returns 'tr35-info.md#Parts' 29*912701f9SAndroid Build Coastguard Worker */ 30*912701f9SAndroid Build Coastguard Workerfunction constructLink(targetSection, anchor) { 31*912701f9SAndroid Build Coastguard Worker const page = `${targetSection}.md`; 32*912701f9SAndroid Build Coastguard Worker if (!anchor) { 33*912701f9SAndroid Build Coastguard Worker return page; 34*912701f9SAndroid Build Coastguard Worker } 35*912701f9SAndroid Build Coastguard Worker return `${page}#${anchor}`; 36*912701f9SAndroid Build Coastguard Worker} 37*912701f9SAndroid Build Coastguard Worker 38*912701f9SAndroid Build Coastguard Worker/** 39*912701f9SAndroid Build Coastguard Worker * Read the input .md file, and write to a corresponding .html file 40*912701f9SAndroid Build Coastguard Worker * @param {string} infile path to input file 41*912701f9SAndroid Build Coastguard Worker * @returns {Promise<string>} name of output file (for status update) 42*912701f9SAndroid Build Coastguard Worker */ 43*912701f9SAndroid Build Coastguard Workerasync function extractAnchors(infile) { 44*912701f9SAndroid Build Coastguard Worker const basename = path.basename(infile, ".html"); 45*912701f9SAndroid Build Coastguard Worker dirname = '../../../docs/ldml'; 46*912701f9SAndroid Build Coastguard Worker console.log(`${SECTION_ICON} Reading ${infile}`); 47*912701f9SAndroid Build Coastguard Worker let f1 = await fs.readFile(infile, "utf-8"); 48*912701f9SAndroid Build Coastguard Worker 49*912701f9SAndroid Build Coastguard Worker // oh the irony of removing a BOM before posting to unicode.org 50*912701f9SAndroid Build Coastguard Worker if (f1.charCodeAt(0) == 0xfeff) { 51*912701f9SAndroid Build Coastguard Worker f1 = f1.substring(3); 52*912701f9SAndroid Build Coastguard Worker } 53*912701f9SAndroid Build Coastguard Worker 54*912701f9SAndroid Build Coastguard Worker const rawHtml = f1; 55*912701f9SAndroid Build Coastguard Worker 56*912701f9SAndroid Build Coastguard Worker // now fix. Spin up a JSDOM so we can manipulate 57*912701f9SAndroid Build Coastguard Worker const dom = new JSDOM(rawHtml); 58*912701f9SAndroid Build Coastguard Worker const document = dom.window.document; 59*912701f9SAndroid Build Coastguard Worker 60*912701f9SAndroid Build Coastguard Worker const anchors = new Set(); 61*912701f9SAndroid Build Coastguard Worker const targets = new Set(); 62*912701f9SAndroid Build Coastguard Worker 63*912701f9SAndroid Build Coastguard Worker function addAnchor(n) { 64*912701f9SAndroid Build Coastguard Worker if (!n) return; 65*912701f9SAndroid Build Coastguard Worker if (anchors.has(n)) { 66*912701f9SAndroid Build Coastguard Worker console.error(`${WARN_ICON} ${constructLink(basename)}: Duplicate anchor: #${n}`); 67*912701f9SAndroid Build Coastguard Worker } else { 68*912701f9SAndroid Build Coastguard Worker anchors.add(n); 69*912701f9SAndroid Build Coastguard Worker } 70*912701f9SAndroid Build Coastguard Worker } 71*912701f9SAndroid Build Coastguard Worker 72*912701f9SAndroid Build Coastguard Worker function addTarget(href) { 73*912701f9SAndroid Build Coastguard Worker const INTRA_PAGE_LINK = /^#(.*)$/; // starts with # => 1=anchor 74*912701f9SAndroid Build Coastguard Worker const TR_SECTION_LINK = /^(tr35(?:[^.]*)).html(?:#(.*)){0,1}$/; // => 1=basename, 2=anchor 75*912701f9SAndroid Build Coastguard Worker const EXTERNAL_LINK = /^(http|https|mailto|ftp):.*$/; // scheme 76*912701f9SAndroid Build Coastguard Worker // Error on all other links 77*912701f9SAndroid Build Coastguard Worker 78*912701f9SAndroid Build Coastguard Worker const intra_page = INTRA_PAGE_LINK.exec(href); 79*912701f9SAndroid Build Coastguard Worker const tr_section = TR_SECTION_LINK.exec(href); 80*912701f9SAndroid Build Coastguard Worker const external = EXTERNAL_LINK.exec(href); 81*912701f9SAndroid Build Coastguard Worker if (intra_page) { 82*912701f9SAndroid Build Coastguard Worker // same page 83*912701f9SAndroid Build Coastguard Worker targets.add(constructLink(basename, intra_page[1])); 84*912701f9SAndroid Build Coastguard Worker } else if (tr_section) { 85*912701f9SAndroid Build Coastguard Worker // another page 86*912701f9SAndroid Build Coastguard Worker targets.add(constructLink(tr_section[1], tr_section[2])); 87*912701f9SAndroid Build Coastguard Worker } else if (external) { 88*912701f9SAndroid Build Coastguard Worker // external 89*912701f9SAndroid Build Coastguard Worker // Do nothing 90*912701f9SAndroid Build Coastguard Worker // TODO: add to list of external links? 91*912701f9SAndroid Build Coastguard Worker } else { 92*912701f9SAndroid Build Coastguard Worker console.error(`${WARN_ICON} ${basename}: Unknown anchor: ${href}`); 93*912701f9SAndroid Build Coastguard Worker } 94*912701f9SAndroid Build Coastguard Worker } 95*912701f9SAndroid Build Coastguard Worker 96*912701f9SAndroid Build Coastguard Worker // extract anchors 97*912701f9SAndroid Build Coastguard Worker for (const a of dom.window.document.getElementsByTagName("*")) { 98*912701f9SAndroid Build Coastguard Worker const id = a.getAttribute("id"); 99*912701f9SAndroid Build Coastguard Worker addAnchor(id); 100*912701f9SAndroid Build Coastguard Worker 101*912701f9SAndroid Build Coastguard Worker if (a.tagName === 'A') { 102*912701f9SAndroid Build Coastguard Worker const name = a.getAttribute("name"); 103*912701f9SAndroid Build Coastguard Worker addAnchor(name); 104*912701f9SAndroid Build Coastguard Worker } 105*912701f9SAndroid Build Coastguard Worker } 106*912701f9SAndroid Build Coastguard Worker // extract targets 107*912701f9SAndroid Build Coastguard Worker for (const a of dom.window.document.getElementsByTagName("A")) { 108*912701f9SAndroid Build Coastguard Worker const href = a.getAttribute("href"); 109*912701f9SAndroid Build Coastguard Worker if (href) { 110*912701f9SAndroid Build Coastguard Worker addTarget(href); 111*912701f9SAndroid Build Coastguard Worker } 112*912701f9SAndroid Build Coastguard Worker } 113*912701f9SAndroid Build Coastguard Worker 114*912701f9SAndroid Build Coastguard Worker const coll = new Intl.Collator(['und']); 115*912701f9SAndroid Build Coastguard Worker const anchorList = Array.from(anchors.values()).sort(coll.compare); 116*912701f9SAndroid Build Coastguard Worker const anchorFile = path.join(dirname, `${basename}.anchors.json`); 117*912701f9SAndroid Build Coastguard Worker await fs.writeFile(anchorFile, JSON.stringify(anchorList, null, ' ')); 118*912701f9SAndroid Build Coastguard Worker const targetList = Array.from(targets.values()).sort(coll.compare); 119*912701f9SAndroid Build Coastguard Worker return [basename, anchorList, targetList]; 120*912701f9SAndroid Build Coastguard Worker} 121*912701f9SAndroid Build Coastguard Worker 122*912701f9SAndroid Build Coastguard Worker/** 123*912701f9SAndroid Build Coastguard Worker * Convert all files 124*912701f9SAndroid Build Coastguard Worker * @returns Promise list of output files 125*912701f9SAndroid Build Coastguard Worker */ 126*912701f9SAndroid Build Coastguard Workerasync function extractAll() { 127*912701f9SAndroid Build Coastguard Worker outbox = "./dist"; 128*912701f9SAndroid Build Coastguard Worker 129*912701f9SAndroid Build Coastguard Worker const fileList = (await fs.readdir(outbox)) 130*912701f9SAndroid Build Coastguard Worker .filter((f) => /\.html$/.test(f)) 131*912701f9SAndroid Build Coastguard Worker .map((f) => path.join(outbox, f)); 132*912701f9SAndroid Build Coastguard Worker return Promise.all(fileList.map(extractAnchors)); 133*912701f9SAndroid Build Coastguard Worker} 134*912701f9SAndroid Build Coastguard Worker 135*912701f9SAndroid Build Coastguard Workerasync function checkAll() { 136*912701f9SAndroid Build Coastguard Worker console.log(`${GEAR_ICON} Reading HTML`); 137*912701f9SAndroid Build Coastguard Worker const checked = await extractAll(); 138*912701f9SAndroid Build Coastguard Worker console.log(`${GEAR_ICON} Collecting internal links`); 139*912701f9SAndroid Build Coastguard Worker 140*912701f9SAndroid Build Coastguard Worker const allInternalTargets = new Set(); 141*912701f9SAndroid Build Coastguard Worker const allInternalAnchors = new Set(); 142*912701f9SAndroid Build Coastguard Worker const sectionToTargets = { 143*912701f9SAndroid Build Coastguard Worker // e.g. "tr35-info" : Set(["tr35-keyboards.md#Element_keyboard", …]) 144*912701f9SAndroid Build Coastguard Worker }; 145*912701f9SAndroid Build Coastguard Worker checked.forEach(([sourceSection,anchorList,targetList]) => { 146*912701f9SAndroid Build Coastguard Worker allInternalAnchors.add(constructLink(sourceSection)); // example: 'tr35-collation.md' 147*912701f9SAndroid Build Coastguard Worker targetList.forEach(target => allInternalTargets.add(target)); 148*912701f9SAndroid Build Coastguard Worker sectionToTargets[sourceSection] = new Set(targetList); // for error checking 149*912701f9SAndroid Build Coastguard Worker const myInternalAnchors = anchorList.map(anchor => constructLink(sourceSection, anchor)); 150*912701f9SAndroid Build Coastguard Worker myInternalAnchors.forEach(anchor => allInternalAnchors.add(anchor)); // tr35-collation.md#Parts 151*912701f9SAndroid Build Coastguard Worker }); 152*912701f9SAndroid Build Coastguard Worker 153*912701f9SAndroid Build Coastguard Worker console.log(`${GEAR_ICON} Checking ${allInternalTargets.size} internal links against ${allInternalAnchors.size} anchors`); 154*912701f9SAndroid Build Coastguard Worker 155*912701f9SAndroid Build Coastguard Worker const missingInternalLinks = new Set(); 156*912701f9SAndroid Build Coastguard Worker 157*912701f9SAndroid Build Coastguard Worker for (const expectedAnchor of allInternalTargets.values()) { 158*912701f9SAndroid Build Coastguard Worker if (!allInternalAnchors.has(expectedAnchor)) { 159*912701f9SAndroid Build Coastguard Worker missingInternalLinks.add(expectedAnchor); 160*912701f9SAndroid Build Coastguard Worker } 161*912701f9SAndroid Build Coastguard Worker } 162*912701f9SAndroid Build Coastguard Worker 163*912701f9SAndroid Build Coastguard Worker if (!!missingInternalLinks.size) { 164*912701f9SAndroid Build Coastguard Worker for (expectedAnchor of missingInternalLinks.values()) { 165*912701f9SAndroid Build Coastguard Worker // coalesce 166*912701f9SAndroid Build Coastguard Worker const sourceSections = ((Object.entries(sectionToTargets) 167*912701f9SAndroid Build Coastguard Worker .filter(([section,s]) => s.has(expectedAnchor))) // Does this section target this anchor? 168*912701f9SAndroid Build Coastguard Worker .map(([section]) => constructLink(section)) // drop the set 169*912701f9SAndroid Build Coastguard Worker .join(' & ') // join section name(s) 170*912701f9SAndroid Build Coastguard Worker ) || '(unknown section(s))'; // error 171*912701f9SAndroid Build Coastguard Worker console.error(`${MISSING_ICON} Broken internal link: ${sourceSections}: (${expectedAnchor})`); 172*912701f9SAndroid Build Coastguard Worker } 173*912701f9SAndroid Build Coastguard Worker console.error(`${WARN_ICON} ${missingInternalLinks.size} missing links.`); 174*912701f9SAndroid Build Coastguard Worker process.exitCode = 1; 175*912701f9SAndroid Build Coastguard Worker } 176*912701f9SAndroid Build Coastguard Worker 177*912701f9SAndroid Build Coastguard Worker console.log(`${POINT_ICON} use: 'lychee --cache docs/ldml' to check external links`); 178*912701f9SAndroid Build Coastguard Worker 179*912701f9SAndroid Build Coastguard Worker return checked.map(([anchorFile]) => anchorFile); 180*912701f9SAndroid Build Coastguard Worker} 181*912701f9SAndroid Build Coastguard WorkercheckAll().then( 182*912701f9SAndroid Build Coastguard Worker (x) => x.forEach(section => { 183*912701f9SAndroid Build Coastguard Worker console.log(`${DONE_ICON} ${constructLink(section)}`); 184*912701f9SAndroid Build Coastguard Worker }), 185*912701f9SAndroid Build Coastguard Worker (e) => { 186*912701f9SAndroid Build Coastguard Worker console.error(e); 187*912701f9SAndroid Build Coastguard Worker process.exitCode = 1; 188*912701f9SAndroid Build Coastguard Worker } 189*912701f9SAndroid Build Coastguard Worker); 190