xref: /aosp_15_r20/external/harfbuzz_ng/src/gen-indic-table.py (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1#!/usr/bin/env python3
2
3"""usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
4
5Input files:
6* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
7* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt
8* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
9"""
10
11import sys
12
13if len (sys.argv) != 4:
14	sys.exit (__doc__)
15
16ALLOWED_SINGLES = [0x00A0, 0x25CC]
17ALLOWED_BLOCKS = [
18	'Basic Latin',
19	'Latin-1 Supplement',
20	'Devanagari',
21	'Bengali',
22	'Gurmukhi',
23	'Gujarati',
24	'Oriya',
25	'Tamil',
26	'Telugu',
27	'Kannada',
28	'Malayalam',
29	'Myanmar',
30	'Khmer',
31	'Vedic Extensions',
32	'General Punctuation',
33	'Superscripts and Subscripts',
34	'Devanagari Extended',
35	'Myanmar Extended-B',
36	'Myanmar Extended-A',
37	'Myanmar Extended-C',
38]
39
40files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
41
42headers = [[f.readline () for i in range (2)] for f in files]
43
44unicode_data = [{} for _ in files]
45for i, f in enumerate (files):
46	for line in f:
47
48		j = line.find ('#')
49		if j >= 0:
50			line = line[:j]
51
52		fields = [x.strip () for x in line.split (';')]
53		if len (fields) == 1:
54			continue
55
56		uu = fields[0].split ('..')
57		start = int (uu[0], 16)
58		if len (uu) == 1:
59			end = start
60		else:
61			end = int (uu[1], 16)
62
63		t = fields[1]
64
65		for u in range (start, end + 1):
66			unicode_data[i][u] = t
67
68# Merge data into one dict:
69defaults = ('Other', 'Not_Applicable', 'No_Block')
70combined = {}
71for i,d in enumerate (unicode_data):
72	for u,v in d.items ():
73		if i == 2 and not u in combined:
74			continue
75		if not u in combined:
76			combined[u] = list (defaults)
77		combined[u][i] = v
78combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
79
80
81# Convert categories & positions types
82
83categories = {
84  'indic' : [
85    'X',
86    'C',
87    'V',
88    'N',
89    'H',
90    'ZWNJ',
91    'ZWJ',
92    'M',
93    'SM',
94    'A',
95    'VD',
96    'PLACEHOLDER',
97    'DOTTEDCIRCLE',
98    'RS',
99    'MPst',
100    'Repha',
101    'Ra',
102    'CM',
103    'Symbol',
104    'CS',
105  ],
106  'khmer' : [
107    'VAbv',
108    'VBlw',
109    'VPre',
110    'VPst',
111
112    'Robatic',
113    'Xgroup',
114    'Ygroup',
115  ],
116  'myanmar' : [
117    'VAbv',
118    'VBlw',
119    'VPre',
120    'VPst',
121
122    'IV',
123    'As',
124    'DB',
125    'GB',
126    'MH',
127    'MR',
128    'MW',
129    'MY',
130    'PT',
131    'VS',
132    'ML',
133  ],
134}
135
136category_map = {
137  'Other'			: 'X',
138  'Avagraha'			: 'Symbol',
139  'Bindu'			: 'SM',
140  'Brahmi_Joining_Number'	: 'PLACEHOLDER', # Don't care.
141  'Cantillation_Mark'		: 'A',
142  'Consonant'			: 'C',
143  'Consonant_Dead'		: 'C',
144  'Consonant_Final'		: 'CM',
145  'Consonant_Head_Letter'	: 'C',
146  'Consonant_Initial_Postfixed'	: 'C', # TODO
147  'Consonant_Killer'		: 'M', # U+17CD only.
148  'Consonant_Medial'		: 'CM',
149  'Consonant_Placeholder'	: 'PLACEHOLDER',
150  'Consonant_Preceding_Repha'	: 'Repha',
151  'Consonant_Prefixed'		: 'X', # Don't care.
152  'Consonant_Subjoined'		: 'CM',
153  'Consonant_Succeeding_Repha'	: 'CM',
154  'Consonant_With_Stacker'	: 'CS',
155  'Gemination_Mark'		: 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552
156  'Invisible_Stacker'		: 'H',
157  'Joiner'			: 'ZWJ',
158  'Modifying_Letter'		: 'X',
159  'Non_Joiner'			: 'ZWNJ',
160  'Nukta'			: 'N',
161  'Number'			: 'PLACEHOLDER',
162  'Number_Joiner'		: 'PLACEHOLDER', # Don't care.
163  'Pure_Killer'			: 'M', # Is like a vowel matra.
164  'Register_Shifter'		: 'RS',
165  'Syllable_Modifier'		: 'SM',
166  'Tone_Letter'			: 'X',
167  'Tone_Mark'			: 'N',
168  'Virama'			: 'H',
169  'Visarga'			: 'SM',
170  'Vowel'			: 'V',
171  'Vowel_Dependent'		: 'M',
172  'Vowel_Independent'		: 'V',
173}
174position_map = {
175  'Not_Applicable'		: 'END',
176
177  'Left'			: 'PRE_C',
178  'Top'				: 'ABOVE_C',
179  'Bottom'			: 'BELOW_C',
180  'Right'			: 'POST_C',
181
182  # These should resolve to the position of the last part of the split sequence.
183  'Bottom_And_Right'		: 'POST_C',
184  'Left_And_Right'		: 'POST_C',
185  'Top_And_Bottom'		: 'BELOW_C',
186  'Top_And_Bottom_And_Left'	: 'BELOW_C',
187  'Top_And_Bottom_And_Right'	: 'POST_C',
188  'Top_And_Left'		: 'ABOVE_C',
189  'Top_And_Left_And_Right'	: 'POST_C',
190  'Top_And_Right'		: 'POST_C',
191
192  'Overstruck'			: 'AFTER_MAIN',
193  'Visual_order_left'		: 'PRE_M',
194}
195
196category_overrides = {
197
198  # These are the variation-selectors. They only appear in the Myanmar grammar
199  # but are not Myanmar-specific
200  0xFE00: 'VS',
201  0xFE01: 'VS',
202  0xFE02: 'VS',
203  0xFE03: 'VS',
204  0xFE04: 'VS',
205  0xFE05: 'VS',
206  0xFE06: 'VS',
207  0xFE07: 'VS',
208  0xFE08: 'VS',
209  0xFE09: 'VS',
210  0xFE0A: 'VS',
211  0xFE0B: 'VS',
212  0xFE0C: 'VS',
213  0xFE0D: 'VS',
214  0xFE0E: 'VS',
215  0xFE0F: 'VS',
216
217  # These appear in the OT Myanmar spec, but are not Myanmar-specific
218  0x2015: 'PLACEHOLDER',
219  0x2022: 'PLACEHOLDER',
220  0x25FB: 'PLACEHOLDER',
221  0x25FC: 'PLACEHOLDER',
222  0x25FD: 'PLACEHOLDER',
223  0x25FE: 'PLACEHOLDER',
224
225
226  # Indic
227
228  0x0930: 'Ra', # Devanagari
229  0x09B0: 'Ra', # Bengali
230  0x09F0: 'Ra', # Bengali
231  0x0A30: 'Ra', # Gurmukhi 	No Reph
232  0x0AB0: 'Ra', # Gujarati
233  0x0B30: 'Ra', # Oriya
234  0x0BB0: 'Ra', # Tamil 	No Reph
235  0x0C30: 'Ra', # Telugu 	Reph formed only with ZWJ
236  0x0CB0: 'Ra', # Kannada
237  0x0D30: 'Ra', # Malayalam 	No Reph, Logical Repha
238
239  # The following act more like the Bindus.
240  0x0953: 'SM',
241  0x0954: 'SM',
242
243  # U+0A40 GURMUKHI VOWEL SIGN II may be preceded by U+0A02 GURMUKHI SIGN BINDI.
244  0x0A40: 'MPst',
245
246  # The following act like consonants.
247  0x0A72: 'C',
248  0x0A73: 'C',
249  0x1CF5: 'C',
250  0x1CF6: 'C',
251
252  # TODO: The following should only be allowed after a Visarga.
253  # For now, just treat them like regular tone marks.
254  0x1CE2: 'A',
255  0x1CE3: 'A',
256  0x1CE4: 'A',
257  0x1CE5: 'A',
258  0x1CE6: 'A',
259  0x1CE7: 'A',
260  0x1CE8: 'A',
261
262  # TODO: The following should only be allowed after some of
263  # the nasalization marks, maybe only for U+1CE9..U+1CF1.
264  # For now, just treat them like tone marks.
265  0x1CED: 'A',
266
267  # The following take marks in standalone clusters, similar to Avagraha.
268  0xA8F2: 'Symbol',
269  0xA8F3: 'Symbol',
270  0xA8F4: 'Symbol',
271  0xA8F5: 'Symbol',
272  0xA8F6: 'Symbol',
273  0xA8F7: 'Symbol',
274  0x1CE9: 'Symbol',
275  0x1CEA: 'Symbol',
276  0x1CEB: 'Symbol',
277  0x1CEC: 'Symbol',
278  0x1CEE: 'Symbol',
279  0x1CEF: 'Symbol',
280  0x1CF0: 'Symbol',
281  0x1CF1: 'Symbol',
282
283  0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524
284
285  # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
286  # so the Indic shaper needs to know their categories.
287  0x11301: 'SM',
288  0x11302: 'SM',
289  0x11303: 'SM',
290  0x1133B: 'N',
291  0x1133C: 'N',
292
293  0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552
294  0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849
295
296  0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613
297  0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623
298  0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511
299
300  0x25CC: 'DOTTEDCIRCLE',
301
302
303  # Khmer
304
305  0x179A: 'Ra',
306
307  0x17CC: 'Robatic',
308  0x17C9: 'Robatic',
309  0x17CA: 'Robatic',
310
311  0x17C6: 'Xgroup',
312  0x17CB: 'Xgroup',
313  0x17CD: 'Xgroup',
314  0x17CE: 'Xgroup',
315  0x17CF: 'Xgroup',
316  0x17D0: 'Xgroup',
317  0x17D1: 'Xgroup',
318
319  0x17C7: 'Ygroup',
320  0x17C8: 'Ygroup',
321  0x17DD: 'Ygroup',
322  0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it.
323
324  0x17D9: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/2384
325
326
327  # Myanmar
328
329  # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
330
331  0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder
332
333  0x1004: 'Ra',
334  0x101B: 'Ra',
335  0x105A: 'Ra',
336
337  0x1032: 'A',
338  0x1036: 'A',
339
340  0x103A: 'As',
341
342  #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do.
343
344  0x103E: 'MH',
345  0x1060: 'ML',
346  0x103C: 'MR',
347  0x103D: 'MW',
348  0x1082: 'MW',
349  0x103B: 'MY',
350  0x105E: 'MY',
351  0x105F: 'MY',
352
353  0x1063: 'PT',
354  0x1064: 'PT',
355  0x1069: 'PT',
356  0x106A: 'PT',
357  0x106B: 'PT',
358  0x106C: 'PT',
359  0x106D: 'PT',
360  0xAA7B: 'PT',
361
362  0x1038: 'SM',
363  0x1087: 'SM',
364  0x1088: 'SM',
365  0x1089: 'SM',
366  0x108A: 'SM',
367  0x108B: 'SM',
368  0x108C: 'SM',
369  0x108D: 'SM',
370  0x108F: 'SM',
371  0x109A: 'SM',
372  0x109B: 'SM',
373  0x109C: 'SM',
374
375  0x104A: 'PLACEHOLDER',
376}
377position_overrides = {
378
379  0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524
380
381  0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec.
382}
383
384def matra_pos_left(u, block):
385  return "PRE_M"
386def matra_pos_right(u, block):
387  if block == 'Devanagari':	return  'AFTER_SUB'
388  if block == 'Bengali':	return  'AFTER_POST'
389  if block == 'Gurmukhi':	return  'AFTER_POST'
390  if block == 'Gujarati':	return  'AFTER_POST'
391  if block == 'Oriya':		return  'AFTER_POST'
392  if block == 'Tamil':		return  'AFTER_POST'
393  if block == 'Telugu':		return  'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB'
394  if block == 'Kannada':	return  'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB'
395  if block == 'Malayalam':	return  'AFTER_POST'
396  return 'AFTER_SUB'
397def matra_pos_top(u, block):
398  # BENG and MLYM don't have top matras.
399  if block == 'Devanagari':	return  'AFTER_SUB'
400  if block == 'Gurmukhi':	return  'AFTER_POST' # Deviate from spec
401  if block == 'Gujarati':	return  'AFTER_SUB'
402  if block == 'Oriya':		return  'AFTER_MAIN'
403  if block == 'Tamil':		return  'AFTER_SUB'
404  if block == 'Telugu':		return  'BEFORE_SUB'
405  if block == 'Kannada':	return  'BEFORE_SUB'
406  return 'AFTER_SUB'
407def matra_pos_bottom(u, block):
408  if block == 'Devanagari':	return  'AFTER_SUB'
409  if block == 'Bengali':	return  'AFTER_SUB'
410  if block == 'Gurmukhi':	return  'AFTER_POST'
411  if block == 'Gujarati':	return  'AFTER_POST'
412  if block == 'Oriya':		return  'AFTER_SUB'
413  if block == 'Tamil':		return  'AFTER_POST'
414  if block == 'Telugu':		return  'BEFORE_SUB'
415  if block == 'Kannada':	return  'BEFORE_SUB'
416  if block == 'Malayalam':	return  'AFTER_POST'
417  return "AFTER_SUB"
418def indic_matra_position(u, pos, block): # Reposition matra
419  if pos == 'PRE_C':	return matra_pos_left(u, block)
420  if pos == 'POST_C':	return matra_pos_right(u, block)
421  if pos == 'ABOVE_C':	return matra_pos_top(u, block)
422  if pos == 'BELOW_C':	return matra_pos_bottom(u, block)
423  assert (False)
424
425def position_to_category(pos):
426  if pos == 'PRE_C':	return 'VPre'
427  if pos == 'ABOVE_C':	return 'VAbv'
428  if pos == 'BELOW_C':	return 'VBlw'
429  if pos == 'POST_C':	return 'VPst'
430  assert(False)
431
432
433defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
434
435indic_data = {}
436for k, (cat, pos, block) in combined.items():
437  cat = category_map[cat]
438  pos = position_map[pos]
439  indic_data[k] = (cat, pos, block)
440
441for k,new_cat in category_overrides.items():
442  (cat, pos, _) = indic_data.get(k, defaults)
443  indic_data[k] = (new_cat, pos, unicode_data[2][k])
444
445# We only expect position for certain types
446positioned_categories = ('CM', 'SM', 'RS', 'H', 'M', 'MPst')
447for k, (cat, pos, block) in indic_data.items():
448  if cat not in positioned_categories:
449    pos = 'END'
450    indic_data[k] = (cat, pos, block)
451
452# Position overrides are more complicated
453
454# Keep in sync with CONSONANT_FLAGS in the shaper
455consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
456matra_categories = ('M', 'MPst')
457smvd_categories = ('SM', 'VD', 'A', 'Symbol')
458for k, (cat, pos, block) in indic_data.items():
459  if cat in consonant_categories:
460    pos = 'BASE_C'
461  elif cat in matra_categories:
462    if block.startswith('Khmer') or block.startswith('Myanmar'):
463      cat = position_to_category(pos)
464    else:
465      pos = indic_matra_position(k, pos, block)
466  elif cat in smvd_categories:
467    pos = 'SMVD';
468  indic_data[k] = (cat, pos, block)
469
470for k,new_pos in position_overrides.items():
471  (cat, pos, _) = indic_data.get(k, defaults)
472  indic_data[k] = (cat, new_pos, unicode_data[2][k])
473
474
475values = [{_: 1} for _ in defaults]
476for vv in indic_data.values():
477  for i,v in enumerate(vv):
478    values[i][v] = values[i].get (v, 0) + 1
479
480
481
482
483# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
484singles = {}
485for u in ALLOWED_SINGLES:
486	singles[u] = indic_data[u]
487	del indic_data[u]
488
489print ("/* == Start of generated table == */")
490print ("/*")
491print (" * The following table is generated by running:")
492print (" *")
493print (" *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt")
494print (" *")
495print (" * on files with these headers:")
496print (" *")
497for h in headers:
498	for l in h:
499		print (" * %s" % (l.strip()))
500print (" */")
501print ()
502print ('#include "hb.hh"')
503print ()
504print ('#ifndef HB_NO_OT_SHAPE')
505print ()
506print ('#include "hb-ot-shaper-indic.hh"')
507print ()
508print ('#pragma GCC diagnostic push')
509print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
510print ()
511
512# Print categories
513for shaper in categories:
514  print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper)
515print ()
516done = {}
517for shaper, shaper_cats in categories.items():
518  print ('/* %s */' % shaper)
519  for cat in shaper_cats:
520    v = shaper[0].upper()
521    if cat not in done:
522      print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat))
523      done[cat] = v
524    else:
525      print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat))
526print ()
527
528# Shorten values
529short = [{
530	"Repha":		'Rf',
531	"PLACEHOLDER":		'GB',
532	"DOTTEDCIRCLE":		'DC',
533	"VPst":			'VR',
534	"VPre":			'VL',
535	"Robatic":		'Rt',
536	"Xgroup":		'Xg',
537	"Ygroup":		'Yg',
538	"As":			'As',
539},{
540	"END":			'X',
541	"BASE_C":		'C',
542	"ABOVE_C":		'T',
543	"BELOW_C":		'B',
544	"POST_C":		'R',
545	"PRE_C":		'L',
546	"PRE_M":		'LM',
547	"AFTER_MAIN":		'A',
548	"AFTER_SUB":		'AS',
549	"BEFORE_SUB":		'BS',
550	"AFTER_POST":		'AP',
551	"SMVD":			'SM',
552}]
553all_shorts = [{},{}]
554
555# Add some of the values, to make them more readable, and to avoid duplicates
556
557for i in range (2):
558	for v,s in short[i].items ():
559		all_shorts[i][s] = v
560
561what = ["OT", "POS"]
562what_short = ["_OT", "_POS"]
563cat_defs = []
564for i in range (2):
565	vv = sorted (values[i].keys ())
566	for v in vv:
567		v_no_and = v.replace ('_And_', '_')
568		if v in short[i]:
569			s = short[i][v]
570		else:
571			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
572			if s in all_shorts[i]:
573				raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
574			all_shorts[i][s] = v
575			short[i][v] = s
576		cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v))
577
578maxlen_s = max ([len (c[0]) for c in cat_defs])
579maxlen_l = max ([len (c[1]) for c in cat_defs])
580maxlen_n = max ([len (c[2]) for c in cat_defs])
581for s in what_short:
582	print ()
583	for c in [c for c in cat_defs if s in c[0]]:
584		print ("#define %s %s /* %s chars; %s */" %
585			(c[0].ljust (maxlen_s), c[1].ljust (maxlen_l), c[2].rjust (maxlen_n), c[3]))
586print ()
587print ('#pragma GCC diagnostic pop')
588print ()
589print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))")
590print ()
591print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short))
592print ()
593print ()
594
595total = 0
596used = 0
597last_block = None
598def print_block (block, start, end, data):
599	global total, used, last_block
600	if block and block != last_block:
601		print ()
602		print ()
603		print ("  /* %s */" % block)
604	num = 0
605	assert start % 8 == 0
606	assert (end+1) % 8 == 0
607	for u in range (start, end+1):
608		if u % 8 == 0:
609			print ()
610			print ("  /* %04X */" % u, end="")
611		if u in data:
612			num += 1
613		d = data.get (u, defaults)
614		print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="")
615
616	total += end - start + 1
617	used += num
618	if block:
619		last_block = block
620
621uu = sorted (indic_data)
622
623last = -100000
624num = 0
625offset = 0
626starts = []
627ends = []
628print ("static const uint16_t indic_table[] = {")
629for u in uu:
630	if u <= last:
631		continue
632	block = indic_data[u][2]
633
634	start = u//8*8
635	end = start+1
636	while end in uu and block == indic_data[end][2]:
637		end += 1
638	end = (end-1)//8*8 + 7
639
640	if start != last + 1:
641		if start - last <= 1+16*2:
642			print_block (None, last+1, start-1, indic_data)
643		else:
644			if last >= 0:
645				ends.append (last + 1)
646				offset += ends[-1] - starts[-1]
647			print ()
648			print ()
649			print ("#define indic_offset_0x%04xu %d" % (start, offset))
650			starts.append (start)
651
652	print_block (block, start, end, indic_data)
653	last = end
654ends.append (last + 1)
655offset += ends[-1] - starts[-1]
656print ()
657print ()
658occupancy = used * 100. / total
659page_bits = 12
660print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
661print ()
662print ("uint16_t")
663print ("hb_indic_get_categories (hb_codepoint_t u)")
664print ("{")
665print ("  switch (u >> %d)" % page_bits)
666print ("  {")
667pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())])
668for p in sorted(pages):
669	print ("    case 0x%0Xu:" % p)
670	for u,d in singles.items ():
671		if p != u>>page_bits: continue
672		print ("      if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]))
673	for (start,end) in zip (starts, ends):
674		if p not in [start>>page_bits, end>>page_bits]: continue
675		offset = "indic_offset_0x%04xu" % start
676		print ("      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
677	print ("      break;")
678	print ("")
679print ("    default:")
680print ("      break;")
681print ("  }")
682print ("  return _(X,X);")
683print ("}")
684print ()
685print ("#undef _")
686print ("#undef INDIC_COMBINE_CATEGORIES")
687for i in range (2):
688	print ()
689	vv = sorted (values[i].keys ())
690	for v in vv:
691		print ("#undef %s_%s" %
692			(what_short[i], short[i][v]))
693print ()
694print ('#endif')
695print ()
696print ("/* == End of generated table == */")
697
698# Maintain at least 50% occupancy in the table */
699if occupancy < 50:
700	raise Exception ("Table too sparse, please investigate: ", occupancy)
701