xref: /aosp_15_r20/external/cronet/third_party/icu/scripts/euckr_gen.sh (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/bin/sh
2# Copyright 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# References:
7#   https://encoding.spec.whatwg.org/#euc-kr
8
9# This script downloads the following file.
10#   https://encoding.spec.whatwg.org/index-euc-kr.txt
11
12function preamble {
13cat <<PREAMBLE
14# ***************************************************************************
15# *
16# *   Copyright (C) 1995-2015, International Business Machines
17# *   Corporation and others.  All Rights Reserved.
18# *
19# *   Generated per the algorithm for EUC-KR
20# *   described at http://encoding.spec.whatwg.org/#euc-kr
21# *
22# ***************************************************************************
23<code_set_name>               "euc-kr-html"
24<mb_cur_max>                  2
25<mb_cur_min>                  1
26<uconv_class>                 "MBCS"
27<subchar>                     \x3F
28<icu:charsetFamily>           "ASCII"
29
30# 81-fe in states 2 and 3 can be tigher and a1-fe, but
31# to be compliant to HTML5 spec, it should be 81-fe.
32<icu:state>                  0-7f, 81-c5:1, c6:2, c7-fe:3
33<icu:state>                  41-5a, 61-7a, 81-fe
34<icu:state>                  41-52, 81-fe
35<icu:state>                  81-fe
36
37CHARMAP
38PREAMBLE
39}
40
41function ascii {
42  for i in $(seq 0 127)
43  do
44    printf '<U%04X> \\x%02X |0\n' $i $i
45  done
46}
47
48
49# HKSCS characters are not supported in encoding ( |lead < 0xA1| )
50function euckr {
51  awk '!/^#/ && !/^$/ \
52       { pointer = $1; \
53         ucs = substr($2, 3); \
54         lead = pointer / 190 + 0x81; \
55         trail = $1 % 190 + 0x41; \
56         tag = 0; \
57         printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\
58                 lead,  trail, tag);\
59       }' \
60  index-euc-kr.txt
61}
62
63function unsorted_table {
64  euckr
65}
66
67wget -N -r -nd https://encoding.spec.whatwg.org/index-euc-kr.txt
68preamble
69ascii
70unsorted_table | sort -k1  | uniq
71echo 'END CHARMAP'
72