xref: /aosp_15_r20/external/brotli/c/common/context.h (revision f4ee7fba7774faf2a30f13154332c0a06550dbc4)
1*f4ee7fbaSAndroid Build Coastguard Worker /* Copyright 2013 Google Inc. All Rights Reserved.
2*f4ee7fbaSAndroid Build Coastguard Worker 
3*f4ee7fbaSAndroid Build Coastguard Worker    Distributed under MIT license.
4*f4ee7fbaSAndroid Build Coastguard Worker    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*f4ee7fbaSAndroid Build Coastguard Worker */
6*f4ee7fbaSAndroid Build Coastguard Worker 
7*f4ee7fbaSAndroid Build Coastguard Worker /* Lookup table to map the previous two bytes to a context id.
8*f4ee7fbaSAndroid Build Coastguard Worker 
9*f4ee7fbaSAndroid Build Coastguard Worker   There are four different context modeling modes defined here:
10*f4ee7fbaSAndroid Build Coastguard Worker     CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
11*f4ee7fbaSAndroid Build Coastguard Worker     CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
12*f4ee7fbaSAndroid Build Coastguard Worker     CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
13*f4ee7fbaSAndroid Build Coastguard Worker     CONTEXT_SIGNED: second-order context model tuned for signed integers.
14*f4ee7fbaSAndroid Build Coastguard Worker 
15*f4ee7fbaSAndroid Build Coastguard Worker   If |p1| and |p2| are the previous two bytes, and |mode| is current context
16*f4ee7fbaSAndroid Build Coastguard Worker   mode, we calculate the context as:
17*f4ee7fbaSAndroid Build Coastguard Worker 
18*f4ee7fbaSAndroid Build Coastguard Worker     context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
19*f4ee7fbaSAndroid Build Coastguard Worker 
20*f4ee7fbaSAndroid Build Coastguard Worker   For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
21*f4ee7fbaSAndroid Build Coastguard Worker   (i.e. < 128), this will be equivalent to
22*f4ee7fbaSAndroid Build Coastguard Worker 
23*f4ee7fbaSAndroid Build Coastguard Worker     context = 4 * context1(p1) + context2(p2),
24*f4ee7fbaSAndroid Build Coastguard Worker 
25*f4ee7fbaSAndroid Build Coastguard Worker   where context1 is based on the previous byte in the following way:
26*f4ee7fbaSAndroid Build Coastguard Worker 
27*f4ee7fbaSAndroid Build Coastguard Worker     0  : non-ASCII control
28*f4ee7fbaSAndroid Build Coastguard Worker     1  : \t, \n, \r
29*f4ee7fbaSAndroid Build Coastguard Worker     2  : space
30*f4ee7fbaSAndroid Build Coastguard Worker     3  : other punctuation
31*f4ee7fbaSAndroid Build Coastguard Worker     4  : " '
32*f4ee7fbaSAndroid Build Coastguard Worker     5  : %
33*f4ee7fbaSAndroid Build Coastguard Worker     6  : ( < [ {
34*f4ee7fbaSAndroid Build Coastguard Worker     7  : ) > ] }
35*f4ee7fbaSAndroid Build Coastguard Worker     8  : , ; :
36*f4ee7fbaSAndroid Build Coastguard Worker     9  : .
37*f4ee7fbaSAndroid Build Coastguard Worker     10 : =
38*f4ee7fbaSAndroid Build Coastguard Worker     11 : number
39*f4ee7fbaSAndroid Build Coastguard Worker     12 : upper-case vowel
40*f4ee7fbaSAndroid Build Coastguard Worker     13 : upper-case consonant
41*f4ee7fbaSAndroid Build Coastguard Worker     14 : lower-case vowel
42*f4ee7fbaSAndroid Build Coastguard Worker     15 : lower-case consonant
43*f4ee7fbaSAndroid Build Coastguard Worker 
44*f4ee7fbaSAndroid Build Coastguard Worker   and context2 is based on the second last byte:
45*f4ee7fbaSAndroid Build Coastguard Worker 
46*f4ee7fbaSAndroid Build Coastguard Worker     0 : control, space
47*f4ee7fbaSAndroid Build Coastguard Worker     1 : punctuation
48*f4ee7fbaSAndroid Build Coastguard Worker     2 : upper-case letter, number
49*f4ee7fbaSAndroid Build Coastguard Worker     3 : lower-case letter
50*f4ee7fbaSAndroid Build Coastguard Worker 
51*f4ee7fbaSAndroid Build Coastguard Worker   If the last byte is ASCII, and the second last byte is not (in a valid UTF8
52*f4ee7fbaSAndroid Build Coastguard Worker   stream it will be a continuation byte, value between 128 and 191), the
53*f4ee7fbaSAndroid Build Coastguard Worker   context is the same as if the second last byte was an ASCII control or space.
54*f4ee7fbaSAndroid Build Coastguard Worker 
55*f4ee7fbaSAndroid Build Coastguard Worker   If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
56*f4ee7fbaSAndroid Build Coastguard Worker   be a continuation byte and the context id is 2 or 3 depending on the LSB of
57*f4ee7fbaSAndroid Build Coastguard Worker   the last byte and to a lesser extent on the second last byte if it is ASCII.
58*f4ee7fbaSAndroid Build Coastguard Worker 
59*f4ee7fbaSAndroid Build Coastguard Worker   If the last byte is a UTF8 continuation byte, the second last byte can be:
60*f4ee7fbaSAndroid Build Coastguard Worker     - continuation byte: the next byte is probably ASCII or lead byte (assuming
61*f4ee7fbaSAndroid Build Coastguard Worker       4-byte UTF8 characters are rare) and the context id is 0 or 1.
62*f4ee7fbaSAndroid Build Coastguard Worker     - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
63*f4ee7fbaSAndroid Build Coastguard Worker     - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
64*f4ee7fbaSAndroid Build Coastguard Worker 
65*f4ee7fbaSAndroid Build Coastguard Worker   The possible value combinations of the previous two bytes, the range of
66*f4ee7fbaSAndroid Build Coastguard Worker   context ids and the type of the next byte is summarized in the table below:
67*f4ee7fbaSAndroid Build Coastguard Worker 
68*f4ee7fbaSAndroid Build Coastguard Worker   |--------\-----------------------------------------------------------------|
69*f4ee7fbaSAndroid Build Coastguard Worker   |         \                         Last byte                              |
70*f4ee7fbaSAndroid Build Coastguard Worker   | Second   \---------------------------------------------------------------|
71*f4ee7fbaSAndroid Build Coastguard Worker   | last byte \    ASCII            |   cont. byte        |   lead byte      |
72*f4ee7fbaSAndroid Build Coastguard Worker   |            \   (0-127)          |   (128-191)         |   (192-)         |
73*f4ee7fbaSAndroid Build Coastguard Worker   |=============|===================|=====================|==================|
74*f4ee7fbaSAndroid Build Coastguard Worker   |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
75*f4ee7fbaSAndroid Build Coastguard Worker   |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
76*f4ee7fbaSAndroid Build Coastguard Worker   |-------------|-------------------|---------------------|------------------|
77*f4ee7fbaSAndroid Build Coastguard Worker   |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
78*f4ee7fbaSAndroid Build Coastguard Worker   |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
79*f4ee7fbaSAndroid Build Coastguard Worker   |-------------|-------------------|---------------------|------------------|
80*f4ee7fbaSAndroid Build Coastguard Worker   |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
81*f4ee7fbaSAndroid Build Coastguard Worker   |  (192-207)  |                   |  context: 0 - 1     |                  |
82*f4ee7fbaSAndroid Build Coastguard Worker   |-------------|-------------------|---------------------|------------------|
83*f4ee7fbaSAndroid Build Coastguard Worker   |  lead byte  | not valid         |  next: cont.        |  not valid       |
84*f4ee7fbaSAndroid Build Coastguard Worker   |  (208-)     |                   |  context: 2 - 3     |                  |
85*f4ee7fbaSAndroid Build Coastguard Worker   |-------------|-------------------|---------------------|------------------|
86*f4ee7fbaSAndroid Build Coastguard Worker */
87*f4ee7fbaSAndroid Build Coastguard Worker 
88*f4ee7fbaSAndroid Build Coastguard Worker #ifndef BROTLI_COMMON_CONTEXT_H_
89*f4ee7fbaSAndroid Build Coastguard Worker #define BROTLI_COMMON_CONTEXT_H_
90*f4ee7fbaSAndroid Build Coastguard Worker 
91*f4ee7fbaSAndroid Build Coastguard Worker #include <brotli/port.h>
92*f4ee7fbaSAndroid Build Coastguard Worker #include <brotli/types.h>
93*f4ee7fbaSAndroid Build Coastguard Worker 
94*f4ee7fbaSAndroid Build Coastguard Worker typedef enum ContextType {
95*f4ee7fbaSAndroid Build Coastguard Worker   CONTEXT_LSB6 = 0,
96*f4ee7fbaSAndroid Build Coastguard Worker   CONTEXT_MSB6 = 1,
97*f4ee7fbaSAndroid Build Coastguard Worker   CONTEXT_UTF8 = 2,
98*f4ee7fbaSAndroid Build Coastguard Worker   CONTEXT_SIGNED = 3
99*f4ee7fbaSAndroid Build Coastguard Worker } ContextType;
100*f4ee7fbaSAndroid Build Coastguard Worker 
101*f4ee7fbaSAndroid Build Coastguard Worker /* "Soft-private", it is exported, but not "advertised" as API. */
102*f4ee7fbaSAndroid Build Coastguard Worker /* Common context lookup table for all context modes. */
103*f4ee7fbaSAndroid Build Coastguard Worker BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048];
104*f4ee7fbaSAndroid Build Coastguard Worker 
105*f4ee7fbaSAndroid Build Coastguard Worker typedef const uint8_t* ContextLut;
106*f4ee7fbaSAndroid Build Coastguard Worker 
107*f4ee7fbaSAndroid Build Coastguard Worker /* typeof(MODE) == ContextType; returns ContextLut */
108*f4ee7fbaSAndroid Build Coastguard Worker #define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9])
109*f4ee7fbaSAndroid Build Coastguard Worker 
110*f4ee7fbaSAndroid Build Coastguard Worker /* typeof(LUT) == ContextLut */
111*f4ee7fbaSAndroid Build Coastguard Worker #define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
112*f4ee7fbaSAndroid Build Coastguard Worker 
113*f4ee7fbaSAndroid Build Coastguard Worker #endif  /* BROTLI_COMMON_CONTEXT_H_ */
114