1#!/usr/bin/env node 2'use strict'; 3/* 4Copyright 2018 The Chromium Authors. All rights reserved. 5Use of this source code is governed by a BSD-style license that can be 6found in the LICENSE file. 7 8This script wraps common HTML transformations including stripping whitespace and 9comments from HTML, CSS, and Javascript. 10*/ 11const dom5 = require('dom5'); 12const escodegen = require('escodegen'); 13const espree = require('espree'); 14const fs = require('fs'); 15const nopt = require('nopt'); 16 17const args = nopt(); 18const filename = args.argv.remain[0]; 19 20let html = fs.readFileSync(filename).toString('utf8'); 21let parsedHtml = dom5.parse(html); 22// First, collapse text nodes around comments (by removing comment nodes, 23// re-serializing, and re-parsing) in order to prevent multiple extraneous 24// newlines. 25for (const node of dom5.nodeWalkAll(parsedHtml, () => true)) { 26 if (dom5.isCommentNode(node)) { 27 dom5.remove(node); 28 } 29} 30html = dom5.serialize(parsedHtml); 31parsedHtml = dom5.parse(html); 32// Some of these transformations are based on polyclean: 33// https://github.com/googlearchive/polyclean 34for (const node of dom5.nodeWalkAll(parsedHtml, () => true)) { 35 if (dom5.isTextNode(node)) { 36 dom5.setTextContent(node, dom5.getTextContent(node) 37 .replace(/ *\n+ */g, '\n') 38 .replace(/\n+/g, '\n')); 39 } else if (dom5.predicates.hasTagName('script')(node) && 40 !dom5.predicates.hasAttr('src')(node)) { 41 let text = dom5.getTextContent(node); 42 const ast = espree.parse(text, {ecmaVersion: 2018}); 43 text = escodegen.generate(ast, {format: {indent: {style: ''}}}); 44 dom5.setTextContent(node, text); 45 } else if (dom5.predicates.hasTagName('style')(node)) { 46 dom5.setTextContent(node, dom5.getTextContent(node) 47 .replace(/[\r\n]/g, '') 48 .replace(/ {2,}/g, ' ') 49 .replace(/(^|[;,\:\{\}]) /g, '$1') 50 .replace(/ ($|[;,\{\}])/g, '$1')); 51 } 52} 53fs.writeFileSync(filename, dom5.serialize(parsedHtml)); 54