xref: /aosp_15_r20/external/cronet/third_party/icu/scripts/single_byte_gen.sh (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/bin/bash
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6function preamble {
7
8encoding="$1"
9cat <<PREAMBLE
10# ***************************************************************************
11# *
12# *   Generated from index-$encoding.txt (
13# *   https://encoding.spec.whatwg.org/index-${encoding}.txt )
14# *   following the algorithm for the single byte legacy encoding
15# *   described at http://encoding.spec.whatwg.org/#single-byte-decoder
16# *
17# ***************************************************************************
18<code_set_name>               "${encoding}-html"
19<char_name_mask>              "AXXXX"
20<mb_cur_max>                  1
21<mb_cur_min>                  1
22<uconv_class>                 "SBCS"
23<subchar>                     \x3F
24<icu:charsetFamily>           "ASCII"
25
26CHARMAP
27PREAMBLE
28
29}
30
31# The list of html5 encodings. Note that iso-8859-8-i is not listed here
32# because its mapping table is exactly the same as iso-8859-8. The difference
33# is BiDi handling (logical vs visual).
34encodings="ibm866 iso-8859-2 iso-8859-3 iso-8859-4 iso-8859-5 iso-8859-6\
35           iso-8859-7 iso-8859-8 iso-8859-10 iso-8859-13 iso-8859-14\
36           iso-8859-15 iso-8859-16 koi8-r koi8-u macintosh\
37           windows-874 windows-1250 windows-1251 windows-1252 windows-1253\
38           windows-1254 windows-1255 windows-1256 windows-1257 windows-1258\
39           x-mac-cyrillic"
40
41ENCODING_DIR="$(dirname "$0")/../source/data/mappings"
42for e in ${encodings}
43do
44  output="${ENCODING_DIR}/${e}-html.ucm"
45  index="index-${e}.txt"
46  indexurl="https://encoding.spec.whatwg.org/index-${e}.txt"
47  curl -o ${index} "${indexurl}"
48  preamble ${e} > ${output}
49  awk 'BEGIN \
50       { \
51         for (i=0; i < 0x80; ++i) \
52         { \
53           printf("<U%04X> \\x%02X |0\n", i, i);} \
54         } \
55       !/^#/ && !/^$/ \
56       {
57         printf ("<U%4s> \\x%02X |0\n", substr($2, 3), $1 + 0x80); \
58       }' ${index} | sort >> ${output}
59  echo 'END CHARMAP' >> ${output}
60  rm ${index}
61done
62
63