1#!/bin/bash 2# Copyright 2014 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6function preamble { 7 8encoding="$1" 9cat <<PREAMBLE 10# *************************************************************************** 11# * 12# * Generated from index-$encoding.txt ( 13# * https://encoding.spec.whatwg.org/index-${encoding}.txt ) 14# * following the algorithm for the single byte legacy encoding 15# * described at http://encoding.spec.whatwg.org/#single-byte-decoder 16# * 17# *************************************************************************** 18<code_set_name> "${encoding}-html" 19<char_name_mask> "AXXXX" 20<mb_cur_max> 1 21<mb_cur_min> 1 22<uconv_class> "SBCS" 23<subchar> \x3F 24<icu:charsetFamily> "ASCII" 25 26CHARMAP 27PREAMBLE 28 29} 30 31# The list of html5 encodings. Note that iso-8859-8-i is not listed here 32# because its mapping table is exactly the same as iso-8859-8. The difference 33# is BiDi handling (logical vs visual). 34encodings="ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6\ 35 iso-8859-7 iso-8859-8 iso-8859-10 iso-8859-13 iso-8859-14\ 36 iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh\ 37 windows-874 windows-1250 windows-1251 windows-1252 windows-1253\ 38 windows-1254 windows-1255 windows-1256 windows-1257 windows-1258\ 39 x-mac-cyrillic" 40 41ENCODING_DIR="$(dirname "$0")/../source/data/mappings" 42for e in ${encodings} 43do 44 output="${ENCODING_DIR}/${e}-html.ucm" 45 index="index-${e}.txt" 46 indexurl="https://encoding.spec.whatwg.org/index-${e}.txt" 47 curl -o ${index} "${indexurl}" 48 preamble ${e} > ${output} 49 awk 'BEGIN \ 50 { \ 51 for (i=0; i < 0x80; ++i) \ 52 { \ 53 printf("<U%04X> \\x%02X |0\n", i, i);} \ 54 } \ 55 !/^#/ && !/^$/ \ 56 { 57 printf ("<U%4s> \\x%02X |0\n", substr($2, 3), $1 + 0x80); \ 58 }' ${index} | sort >> ${output} 59 echo 'END CHARMAP' >> ${output} 60 rm ${index} 61done 62 63