xref: /aosp_15_r20/external/cldr/tools/scripts/tr-archive/archive.js (revision 912701f9769bb47905792267661f0baf2b85bed5)
1*912701f9SAndroid Build Coastguard Workerconst fs = require("fs").promises;
2*912701f9SAndroid Build Coastguard Workerconst { marked } = require("marked");
3*912701f9SAndroid Build Coastguard Workerconst jsdom = require("jsdom");
4*912701f9SAndroid Build Coastguard Workerconst { JSDOM } = jsdom;
5*912701f9SAndroid Build Coastguard Workerconst path = require("path");
6*912701f9SAndroid Build Coastguard Workerconst markedAlert = require('marked-alert');
7*912701f9SAndroid Build Coastguard Worker
8*912701f9SAndroid Build Coastguard Worker// Setup some options for our markdown renderer
9*912701f9SAndroid Build Coastguard Workermarked.setOptions({
10*912701f9SAndroid Build Coastguard Worker  renderer: new marked.Renderer(),
11*912701f9SAndroid Build Coastguard Worker
12*912701f9SAndroid Build Coastguard Worker  // Add a code highlighter
13*912701f9SAndroid Build Coastguard Worker  highlight: function (code, forlanguage) {
14*912701f9SAndroid Build Coastguard Worker    const hljs = require("highlight.js");
15*912701f9SAndroid Build Coastguard Worker    language = hljs.getLanguage(forlanguage) ? forlanguage : "plaintext";
16*912701f9SAndroid Build Coastguard Worker    return hljs.highlight(code, { language }).value;
17*912701f9SAndroid Build Coastguard Worker  },
18*912701f9SAndroid Build Coastguard Worker  pedantic: false,
19*912701f9SAndroid Build Coastguard Worker  gfm: true,
20*912701f9SAndroid Build Coastguard Worker  breaks: false,
21*912701f9SAndroid Build Coastguard Worker  sanitize: false,
22*912701f9SAndroid Build Coastguard Worker  smartLists: true,
23*912701f9SAndroid Build Coastguard Worker  smartypants: false,
24*912701f9SAndroid Build Coastguard Worker  xhtml: false,
25*912701f9SAndroid Build Coastguard Worker});
26*912701f9SAndroid Build Coastguard Worker
27*912701f9SAndroid Build Coastguard Workermarked.use(markedAlert());
28*912701f9SAndroid Build Coastguard Worker
29*912701f9SAndroid Build Coastguard Worker/**
30*912701f9SAndroid Build Coastguard Worker * Read the input .md file, and write to a corresponding .html file
31*912701f9SAndroid Build Coastguard Worker * @param {string} infile path to input file
32*912701f9SAndroid Build Coastguard Worker * @returns {Promise<string>} name of output file (for status update)
33*912701f9SAndroid Build Coastguard Worker */
34*912701f9SAndroid Build Coastguard Workerasync function renderit(infile) {
35*912701f9SAndroid Build Coastguard Worker  console.log(`Reading ${infile}`);
36*912701f9SAndroid Build Coastguard Worker  basename = path.basename(infile, ".md");
37*912701f9SAndroid Build Coastguard Worker  const outfile = path.join(path.dirname(infile), `${basename}.html`);
38*912701f9SAndroid Build Coastguard Worker  let f1 = await fs.readFile(infile, "utf-8");
39*912701f9SAndroid Build Coastguard Worker
40*912701f9SAndroid Build Coastguard Worker  // oh the irony of removing a BOM before posting to unicode.org
41*912701f9SAndroid Build Coastguard Worker  if (f1.charCodeAt(0) == 0xfeff) {
42*912701f9SAndroid Build Coastguard Worker    f1 = f1.substring(3);
43*912701f9SAndroid Build Coastguard Worker  }
44*912701f9SAndroid Build Coastguard Worker
45*912701f9SAndroid Build Coastguard Worker  // render to HTML
46*912701f9SAndroid Build Coastguard Worker  const rawHtml = marked(f1);
47*912701f9SAndroid Build Coastguard Worker
48*912701f9SAndroid Build Coastguard Worker  // now fix. Spin up a JSDOM so we can manipulate
49*912701f9SAndroid Build Coastguard Worker  const dom = new JSDOM(rawHtml);
50*912701f9SAndroid Build Coastguard Worker  const document = dom.window.document;
51*912701f9SAndroid Build Coastguard Worker
52*912701f9SAndroid Build Coastguard Worker  // First the HEAD
53*912701f9SAndroid Build Coastguard Worker  const head = dom.window.document.getElementsByTagName("head")[0];
54*912701f9SAndroid Build Coastguard Worker
55*912701f9SAndroid Build Coastguard Worker  // add CSS to HEAD
56*912701f9SAndroid Build Coastguard Worker  head.innerHTML =
57*912701f9SAndroid Build Coastguard Worker    head.innerHTML +
58*912701f9SAndroid Build Coastguard Worker    `<meta charset="utf-8">\n` +
59*912701f9SAndroid Build Coastguard Worker    `<link rel='stylesheet' type='text/css' media='screen' href='../reports-v2.css'>\n` +
60*912701f9SAndroid Build Coastguard Worker    `<link rel='stylesheet' type='text/css' media='screen' href='tr35.css'>\n`;
61*912701f9SAndroid Build Coastguard Worker
62*912701f9SAndroid Build Coastguard Worker  // Assume there's not already a title and that we need to add one.
63*912701f9SAndroid Build Coastguard Worker  if (dom.window.document.getElementsByTagName("title").length >= 1) {
64*912701f9SAndroid Build Coastguard Worker    console.log("Already had a <title>… not changing.");
65*912701f9SAndroid Build Coastguard Worker  } else {
66*912701f9SAndroid Build Coastguard Worker    const title = document.createElement("title");
67*912701f9SAndroid Build Coastguard Worker    const first_h1_text = document.getElementsByTagName("h1")[0].textContent.replace(')Part', ') Part');
68*912701f9SAndroid Build Coastguard Worker    title.appendChild(document.createTextNode(first_h1_text))
69*912701f9SAndroid Build Coastguard Worker    head.appendChild(title);
70*912701f9SAndroid Build Coastguard Worker  }
71*912701f9SAndroid Build Coastguard Worker
72*912701f9SAndroid Build Coastguard Worker  // calculate the header object
73*912701f9SAndroid Build Coastguard Worker  const header = dom.window.document.createElement("div");
74*912701f9SAndroid Build Coastguard Worker  header.setAttribute("class", "header");
75*912701f9SAndroid Build Coastguard Worker
76*912701f9SAndroid Build Coastguard Worker  // taken from prior TRs, read from the header in 'header.html'
77*912701f9SAndroid Build Coastguard Worker  header.innerHTML = (await fs.readFile('header.html', 'utf-8')).trim();
78*912701f9SAndroid Build Coastguard Worker
79*912701f9SAndroid Build Coastguard Worker  // Move all elements out of the top level body and into a subelement
80*912701f9SAndroid Build Coastguard Worker  // The subelement is <div class="body"/>
81*912701f9SAndroid Build Coastguard Worker  const body = dom.window.document.getElementsByTagName("body")[0];
82*912701f9SAndroid Build Coastguard Worker  const bp = body.parentNode;
83*912701f9SAndroid Build Coastguard Worker  div = dom.window.document.createElement("div");
84*912701f9SAndroid Build Coastguard Worker  div.setAttribute("class", "body");
85*912701f9SAndroid Build Coastguard Worker  let sawFirstTable = false;
86*912701f9SAndroid Build Coastguard Worker  for (const e of body.childNodes) {
87*912701f9SAndroid Build Coastguard Worker    body.removeChild(e);
88*912701f9SAndroid Build Coastguard Worker    if (div.childNodes.length === 0 && e.tagName === 'P') {
89*912701f9SAndroid Build Coastguard Worker      // update title element to <h2 class="uaxtitle"/>
90*912701f9SAndroid Build Coastguard Worker      const newTitle = document.createElement('h2');
91*912701f9SAndroid Build Coastguard Worker      newTitle.setAttribute("class", "uaxtitle");
92*912701f9SAndroid Build Coastguard Worker      newTitle.appendChild(document.createTextNode(e.textContent));
93*912701f9SAndroid Build Coastguard Worker      div.appendChild(newTitle);
94*912701f9SAndroid Build Coastguard Worker    } else {
95*912701f9SAndroid Build Coastguard Worker      if (!sawFirstTable && e.tagName === 'TABLE') {
96*912701f9SAndroid Build Coastguard Worker        // Update first table to simple width=90%
97*912701f9SAndroid Build Coastguard Worker        // The first table is the document header (Author, etc.)
98*912701f9SAndroid Build Coastguard Worker        e.setAttribute("class", "simple");
99*912701f9SAndroid Build Coastguard Worker        e.setAttribute("width", "90%");
100*912701f9SAndroid Build Coastguard Worker        sawFirstTable = true;
101*912701f9SAndroid Build Coastguard Worker      }
102*912701f9SAndroid Build Coastguard Worker      div.appendChild(e);
103*912701f9SAndroid Build Coastguard Worker    }
104*912701f9SAndroid Build Coastguard Worker  }
105*912701f9SAndroid Build Coastguard Worker
106*912701f9SAndroid Build Coastguard Worker  /**
107*912701f9SAndroid Build Coastguard Worker   * create a <SCRIPT/> object.
108*912701f9SAndroid Build Coastguard Worker   * Choose ONE of src or code.
109*912701f9SAndroid Build Coastguard Worker   * @param {Object} obj
110*912701f9SAndroid Build Coastguard Worker   * @param {string} obj.src source of script as url
111*912701f9SAndroid Build Coastguard Worker   * @param {string} obj.code code for script as text
112*912701f9SAndroid Build Coastguard Worker   * @returns
113*912701f9SAndroid Build Coastguard Worker   */
114*912701f9SAndroid Build Coastguard Worker  function getScript({src, code})  {
115*912701f9SAndroid Build Coastguard Worker    const script = dom.window.document.createElement("script");
116*912701f9SAndroid Build Coastguard Worker    if (src) {
117*912701f9SAndroid Build Coastguard Worker      script.setAttribute("src", src);
118*912701f9SAndroid Build Coastguard Worker    }
119*912701f9SAndroid Build Coastguard Worker    if (code) {
120*912701f9SAndroid Build Coastguard Worker      script.appendChild(dom.window.document.createTextNode(code));
121*912701f9SAndroid Build Coastguard Worker    }
122*912701f9SAndroid Build Coastguard Worker    return script;
123*912701f9SAndroid Build Coastguard Worker  }
124*912701f9SAndroid Build Coastguard Worker
125*912701f9SAndroid Build Coastguard Worker  // body already has no content to it at this point.
126*912701f9SAndroid Build Coastguard Worker  // Add all the pieces back.
127*912701f9SAndroid Build Coastguard Worker  body.appendChild(getScript({ src: './js/anchor.min.js' }));
128*912701f9SAndroid Build Coastguard Worker  body.appendChild(header);
129*912701f9SAndroid Build Coastguard Worker  body.appendChild(div);
130*912701f9SAndroid Build Coastguard Worker
131*912701f9SAndroid Build Coastguard Worker  // now, fix all links from  ….md#…  to ….html#…
132*912701f9SAndroid Build Coastguard Worker  for (const e of dom.window.document.getElementsByTagName("a")) {
133*912701f9SAndroid Build Coastguard Worker    const href = e.getAttribute("href");
134*912701f9SAndroid Build Coastguard Worker    let m;
135*912701f9SAndroid Build Coastguard Worker    if ((m = /^(.*)\.md#(.*)$/.exec(href))) {
136*912701f9SAndroid Build Coastguard Worker      e.setAttribute("href", `${m[1]}.html#${m[2]}`);
137*912701f9SAndroid Build Coastguard Worker    } else if ((m = /^(.*)\.md$/.exec(href))) {
138*912701f9SAndroid Build Coastguard Worker      e.setAttribute("href", `${m[1]}.html`);
139*912701f9SAndroid Build Coastguard Worker    }
140*912701f9SAndroid Build Coastguard Worker  }
141*912701f9SAndroid Build Coastguard Worker
142*912701f9SAndroid Build Coastguard Worker  // put this last
143*912701f9SAndroid Build Coastguard Worker  body.appendChild(getScript({
144*912701f9SAndroid Build Coastguard Worker    // This invokes anchor.js
145*912701f9SAndroid Build Coastguard Worker    code: `anchors.add('h1, h2, h3, h4, h5, h6, caption, dfn');`
146*912701f9SAndroid Build Coastguard Worker  }));
147*912701f9SAndroid Build Coastguard Worker
148*912701f9SAndroid Build Coastguard Worker  // Now, fixup captions
149*912701f9SAndroid Build Coastguard Worker  // Look for:  <h6>Table: …</h6> followed by <table>…</table>
150*912701f9SAndroid Build Coastguard Worker  // Move the h6 inside the table, but as <caption/>
151*912701f9SAndroid Build Coastguard Worker  const h6es = dom.window.document.getElementsByTagName("h6");
152*912701f9SAndroid Build Coastguard Worker  const toRemove = [];
153*912701f9SAndroid Build Coastguard Worker  for (const h6 of h6es) {
154*912701f9SAndroid Build Coastguard Worker    if (!h6.innerHTML.startsWith("Table: ")) {
155*912701f9SAndroid Build Coastguard Worker      console.error('Does not start with Table: ' + h6.innerHTML);
156*912701f9SAndroid Build Coastguard Worker      continue; // no 'Table:' marker.
157*912701f9SAndroid Build Coastguard Worker    }
158*912701f9SAndroid Build Coastguard Worker    const next = h6.nextElementSibling;
159*912701f9SAndroid Build Coastguard Worker    if (next.tagName !== 'TABLE') {
160*912701f9SAndroid Build Coastguard Worker      console.error('Not a following table for ' + h6.innerHTML);
161*912701f9SAndroid Build Coastguard Worker      continue; // Next item is not a table. Maybe a PRE or something.
162*912701f9SAndroid Build Coastguard Worker    }
163*912701f9SAndroid Build Coastguard Worker    const caption = dom.window.document.createElement("caption");
164*912701f9SAndroid Build Coastguard Worker    for (const e of h6.childNodes) {
165*912701f9SAndroid Build Coastguard Worker      // h6.removeChild(e);
166*912701f9SAndroid Build Coastguard Worker      caption.appendChild(e.cloneNode(true));
167*912701f9SAndroid Build Coastguard Worker    }
168*912701f9SAndroid Build Coastguard Worker    for (const p of h6.attributes) {
169*912701f9SAndroid Build Coastguard Worker      caption.setAttribute(p.name, p.value);
170*912701f9SAndroid Build Coastguard Worker      h6.removeAttribute(p.name); // so that it does not have a conflicting id
171*912701f9SAndroid Build Coastguard Worker    }
172*912701f9SAndroid Build Coastguard Worker    next.prepend(caption);
173*912701f9SAndroid Build Coastguard Worker    toRemove.push(h6);
174*912701f9SAndroid Build Coastguard Worker  }
175*912701f9SAndroid Build Coastguard Worker  for (const h6 of toRemove) {
176*912701f9SAndroid Build Coastguard Worker    h6.remove();
177*912701f9SAndroid Build Coastguard Worker  }
178*912701f9SAndroid Build Coastguard Worker
179*912701f9SAndroid Build Coastguard Worker  // Drop generated anchors where there is an explicit anchor
180*912701f9SAndroid Build Coastguard Worker  const anchors = dom.window.document.getElementsByTagName("a");
181*912701f9SAndroid Build Coastguard Worker  for (const a of anchors) {
182*912701f9SAndroid Build Coastguard Worker    // a needs to have a name
183*912701f9SAndroid Build Coastguard Worker    const aname = a.getAttribute('name');
184*912701f9SAndroid Build Coastguard Worker    if (!aname) continue;
185*912701f9SAndroid Build Coastguard Worker    // parent needs to have a single child node and its own 'id'.
186*912701f9SAndroid Build Coastguard Worker    const parent = a.parentElement;
187*912701f9SAndroid Build Coastguard Worker    if (parent.childElementCount !== 1) continue;
188*912701f9SAndroid Build Coastguard Worker    const parid = parent.getAttribute('id');
189*912701f9SAndroid Build Coastguard Worker    if(!parid) continue;
190*912701f9SAndroid Build Coastguard Worker    // Criteria met. swap the name and id
191*912701f9SAndroid Build Coastguard Worker    parent.setAttribute('id', aname);
192*912701f9SAndroid Build Coastguard Worker    a.setAttribute('name', parid);
193*912701f9SAndroid Build Coastguard Worker  }
194*912701f9SAndroid Build Coastguard Worker
195*912701f9SAndroid Build Coastguard Worker  // OK, done munging the DOM, write it out.
196*912701f9SAndroid Build Coastguard Worker  console.log(`Writing ${outfile}`);
197*912701f9SAndroid Build Coastguard Worker
198*912701f9SAndroid Build Coastguard Worker  // TODO: we assume that DOCTYPE is not written.
199*912701f9SAndroid Build Coastguard Worker  await fs.writeFile(outfile, `<!DOCTYPE html>\n`
200*912701f9SAndroid Build Coastguard Worker                              + dom.serialize());
201*912701f9SAndroid Build Coastguard Worker  return outfile;
202*912701f9SAndroid Build Coastguard Worker}
203*912701f9SAndroid Build Coastguard Worker
204*912701f9SAndroid Build Coastguard Worker/**
205*912701f9SAndroid Build Coastguard Worker * Convert all files
206*912701f9SAndroid Build Coastguard Worker * @returns Promise<String[]> list of output files
207*912701f9SAndroid Build Coastguard Worker */
208*912701f9SAndroid Build Coastguard Workerasync function fixall() {
209*912701f9SAndroid Build Coastguard Worker  outbox = "./dist";
210*912701f9SAndroid Build Coastguard Worker
211*912701f9SAndroid Build Coastguard Worker  // TODO: move source file copy into JavaScript?
212*912701f9SAndroid Build Coastguard Worker  // srcbox = '../../../docs/ldml';
213*912701f9SAndroid Build Coastguard Worker
214*912701f9SAndroid Build Coastguard Worker  const fileList = (await fs.readdir(outbox))
215*912701f9SAndroid Build Coastguard Worker    .filter((f) => /\.md$/.test(f))
216*912701f9SAndroid Build Coastguard Worker    .map((f) => path.join(outbox, f));
217*912701f9SAndroid Build Coastguard Worker  return Promise.all(fileList.map(renderit));
218*912701f9SAndroid Build Coastguard Worker}
219*912701f9SAndroid Build Coastguard Worker
220*912701f9SAndroid Build Coastguard Workerfixall().then(
221*912701f9SAndroid Build Coastguard Worker  (x) => console.dir(x),
222*912701f9SAndroid Build Coastguard Worker  (e) => {
223*912701f9SAndroid Build Coastguard Worker    console.error(e);
224*912701f9SAndroid Build Coastguard Worker    process.exitCode = 1;
225*912701f9SAndroid Build Coastguard Worker  }
226*912701f9SAndroid Build Coastguard Worker);
227