1#!/bin/sh 2# Copyright 2015 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# References: 7# https://encoding.spec.whatwg.org/#euc-kr 8 9# This script downloads the following file. 10# https://encoding.spec.whatwg.org/index-euc-kr.txt 11 12function preamble { 13cat <<PREAMBLE 14# *************************************************************************** 15# * 16# * Copyright (C) 1995-2015, International Business Machines 17# * Corporation and others. All Rights Reserved. 18# * 19# * Generated per the algorithm for EUC-KR 20# * described at http://encoding.spec.whatwg.org/#euc-kr 21# * 22# *************************************************************************** 23<code_set_name> "euc-kr-html" 24<mb_cur_max> 2 25<mb_cur_min> 1 26<uconv_class> "MBCS" 27<subchar> \x3F 28<icu:charsetFamily> "ASCII" 29 30# 81-fe in states 2 and 3 can be tigher and a1-fe, but 31# to be compliant to HTML5 spec, it should be 81-fe. 32<icu:state> 0-7f, 81-c5:1, c6:2, c7-fe:3 33<icu:state> 41-5a, 61-7a, 81-fe 34<icu:state> 41-52, 81-fe 35<icu:state> 81-fe 36 37CHARMAP 38PREAMBLE 39} 40 41function ascii { 42 for i in $(seq 0 127) 43 do 44 printf '<U%04X> \\x%02X |0\n' $i $i 45 done 46} 47 48 49# HKSCS characters are not supported in encoding ( |lead < 0xA1| ) 50function euckr { 51 awk '!/^#/ && !/^$/ \ 52 { pointer = $1; \ 53 ucs = substr($2, 3); \ 54 lead = pointer / 190 + 0x81; \ 55 trail = $1 % 190 + 0x41; \ 56 tag = 0; \ 57 printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\ 58 lead, trail, tag);\ 59 }' \ 60 index-euc-kr.txt 61} 62 63function unsorted_table { 64 euckr 65} 66 67wget -N -r -nd https://encoding.spec.whatwg.org/index-euc-kr.txt 68preamble 69ascii 70unsorted_table | sort -k1 | uniq 71echo 'END CHARMAP' 72