1*f4ee7fbaSAndroid Build Coastguard Worker /* Copyright 2013 Google Inc. All Rights Reserved. 2*f4ee7fbaSAndroid Build Coastguard Worker 3*f4ee7fbaSAndroid Build Coastguard Worker Distributed under MIT license. 4*f4ee7fbaSAndroid Build Coastguard Worker See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5*f4ee7fbaSAndroid Build Coastguard Worker */ 6*f4ee7fbaSAndroid Build Coastguard Worker 7*f4ee7fbaSAndroid Build Coastguard Worker /* Lookup table to map the previous two bytes to a context id. 8*f4ee7fbaSAndroid Build Coastguard Worker 9*f4ee7fbaSAndroid Build Coastguard Worker There are four different context modeling modes defined here: 10*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, 11*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, 12*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, 13*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_SIGNED: second-order context model tuned for signed integers. 14*f4ee7fbaSAndroid Build Coastguard Worker 15*f4ee7fbaSAndroid Build Coastguard Worker If |p1| and |p2| are the previous two bytes, and |mode| is current context 16*f4ee7fbaSAndroid Build Coastguard Worker mode, we calculate the context as: 17*f4ee7fbaSAndroid Build Coastguard Worker 18*f4ee7fbaSAndroid Build Coastguard Worker context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256]. 19*f4ee7fbaSAndroid Build Coastguard Worker 20*f4ee7fbaSAndroid Build Coastguard Worker For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters 21*f4ee7fbaSAndroid Build Coastguard Worker (i.e. < 128), this will be equivalent to 22*f4ee7fbaSAndroid Build Coastguard Worker 23*f4ee7fbaSAndroid Build Coastguard Worker context = 4 * context1(p1) + context2(p2), 24*f4ee7fbaSAndroid Build Coastguard Worker 25*f4ee7fbaSAndroid Build Coastguard Worker where context1 is based on the previous byte in the following way: 26*f4ee7fbaSAndroid Build Coastguard Worker 27*f4ee7fbaSAndroid Build Coastguard Worker 0 : non-ASCII control 28*f4ee7fbaSAndroid Build Coastguard Worker 1 : \t, \n, \r 29*f4ee7fbaSAndroid Build Coastguard Worker 2 : space 30*f4ee7fbaSAndroid Build Coastguard Worker 3 : other punctuation 31*f4ee7fbaSAndroid Build Coastguard Worker 4 : " ' 32*f4ee7fbaSAndroid Build Coastguard Worker 5 : % 33*f4ee7fbaSAndroid Build Coastguard Worker 6 : ( < [ { 34*f4ee7fbaSAndroid Build Coastguard Worker 7 : ) > ] } 35*f4ee7fbaSAndroid Build Coastguard Worker 8 : , ; : 36*f4ee7fbaSAndroid Build Coastguard Worker 9 : . 37*f4ee7fbaSAndroid Build Coastguard Worker 10 : = 38*f4ee7fbaSAndroid Build Coastguard Worker 11 : number 39*f4ee7fbaSAndroid Build Coastguard Worker 12 : upper-case vowel 40*f4ee7fbaSAndroid Build Coastguard Worker 13 : upper-case consonant 41*f4ee7fbaSAndroid Build Coastguard Worker 14 : lower-case vowel 42*f4ee7fbaSAndroid Build Coastguard Worker 15 : lower-case consonant 43*f4ee7fbaSAndroid Build Coastguard Worker 44*f4ee7fbaSAndroid Build Coastguard Worker and context2 is based on the second last byte: 45*f4ee7fbaSAndroid Build Coastguard Worker 46*f4ee7fbaSAndroid Build Coastguard Worker 0 : control, space 47*f4ee7fbaSAndroid Build Coastguard Worker 1 : punctuation 48*f4ee7fbaSAndroid Build Coastguard Worker 2 : upper-case letter, number 49*f4ee7fbaSAndroid Build Coastguard Worker 3 : lower-case letter 50*f4ee7fbaSAndroid Build Coastguard Worker 51*f4ee7fbaSAndroid Build Coastguard Worker If the last byte is ASCII, and the second last byte is not (in a valid UTF8 52*f4ee7fbaSAndroid Build Coastguard Worker stream it will be a continuation byte, value between 128 and 191), the 53*f4ee7fbaSAndroid Build Coastguard Worker context is the same as if the second last byte was an ASCII control or space. 54*f4ee7fbaSAndroid Build Coastguard Worker 55*f4ee7fbaSAndroid Build Coastguard Worker If the last byte is a UTF8 lead byte (value >= 192), then the next byte will 56*f4ee7fbaSAndroid Build Coastguard Worker be a continuation byte and the context id is 2 or 3 depending on the LSB of 57*f4ee7fbaSAndroid Build Coastguard Worker the last byte and to a lesser extent on the second last byte if it is ASCII. 58*f4ee7fbaSAndroid Build Coastguard Worker 59*f4ee7fbaSAndroid Build Coastguard Worker If the last byte is a UTF8 continuation byte, the second last byte can be: 60*f4ee7fbaSAndroid Build Coastguard Worker - continuation byte: the next byte is probably ASCII or lead byte (assuming 61*f4ee7fbaSAndroid Build Coastguard Worker 4-byte UTF8 characters are rare) and the context id is 0 or 1. 62*f4ee7fbaSAndroid Build Coastguard Worker - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 63*f4ee7fbaSAndroid Build Coastguard Worker - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 64*f4ee7fbaSAndroid Build Coastguard Worker 65*f4ee7fbaSAndroid Build Coastguard Worker The possible value combinations of the previous two bytes, the range of 66*f4ee7fbaSAndroid Build Coastguard Worker context ids and the type of the next byte is summarized in the table below: 67*f4ee7fbaSAndroid Build Coastguard Worker 68*f4ee7fbaSAndroid Build Coastguard Worker |--------\-----------------------------------------------------------------| 69*f4ee7fbaSAndroid Build Coastguard Worker | \ Last byte | 70*f4ee7fbaSAndroid Build Coastguard Worker | Second \---------------------------------------------------------------| 71*f4ee7fbaSAndroid Build Coastguard Worker | last byte \ ASCII | cont. byte | lead byte | 72*f4ee7fbaSAndroid Build Coastguard Worker | \ (0-127) | (128-191) | (192-) | 73*f4ee7fbaSAndroid Build Coastguard Worker |=============|===================|=====================|==================| 74*f4ee7fbaSAndroid Build Coastguard Worker | ASCII | next: ASCII/lead | not valid | next: cont. | 75*f4ee7fbaSAndroid Build Coastguard Worker | (0-127) | context: 4 - 63 | | context: 2 - 3 | 76*f4ee7fbaSAndroid Build Coastguard Worker |-------------|-------------------|---------------------|------------------| 77*f4ee7fbaSAndroid Build Coastguard Worker | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | 78*f4ee7fbaSAndroid Build Coastguard Worker | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | 79*f4ee7fbaSAndroid Build Coastguard Worker |-------------|-------------------|---------------------|------------------| 80*f4ee7fbaSAndroid Build Coastguard Worker | lead byte | not valid | next: ASCII/lead | not valid | 81*f4ee7fbaSAndroid Build Coastguard Worker | (192-207) | | context: 0 - 1 | | 82*f4ee7fbaSAndroid Build Coastguard Worker |-------------|-------------------|---------------------|------------------| 83*f4ee7fbaSAndroid Build Coastguard Worker | lead byte | not valid | next: cont. | not valid | 84*f4ee7fbaSAndroid Build Coastguard Worker | (208-) | | context: 2 - 3 | | 85*f4ee7fbaSAndroid Build Coastguard Worker |-------------|-------------------|---------------------|------------------| 86*f4ee7fbaSAndroid Build Coastguard Worker */ 87*f4ee7fbaSAndroid Build Coastguard Worker 88*f4ee7fbaSAndroid Build Coastguard Worker #ifndef BROTLI_COMMON_CONTEXT_H_ 89*f4ee7fbaSAndroid Build Coastguard Worker #define BROTLI_COMMON_CONTEXT_H_ 90*f4ee7fbaSAndroid Build Coastguard Worker 91*f4ee7fbaSAndroid Build Coastguard Worker #include <brotli/port.h> 92*f4ee7fbaSAndroid Build Coastguard Worker #include <brotli/types.h> 93*f4ee7fbaSAndroid Build Coastguard Worker 94*f4ee7fbaSAndroid Build Coastguard Worker typedef enum ContextType { 95*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_LSB6 = 0, 96*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_MSB6 = 1, 97*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_UTF8 = 2, 98*f4ee7fbaSAndroid Build Coastguard Worker CONTEXT_SIGNED = 3 99*f4ee7fbaSAndroid Build Coastguard Worker } ContextType; 100*f4ee7fbaSAndroid Build Coastguard Worker 101*f4ee7fbaSAndroid Build Coastguard Worker /* "Soft-private", it is exported, but not "advertised" as API. */ 102*f4ee7fbaSAndroid Build Coastguard Worker /* Common context lookup table for all context modes. */ 103*f4ee7fbaSAndroid Build Coastguard Worker BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048]; 104*f4ee7fbaSAndroid Build Coastguard Worker 105*f4ee7fbaSAndroid Build Coastguard Worker typedef const uint8_t* ContextLut; 106*f4ee7fbaSAndroid Build Coastguard Worker 107*f4ee7fbaSAndroid Build Coastguard Worker /* typeof(MODE) == ContextType; returns ContextLut */ 108*f4ee7fbaSAndroid Build Coastguard Worker #define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9]) 109*f4ee7fbaSAndroid Build Coastguard Worker 110*f4ee7fbaSAndroid Build Coastguard Worker /* typeof(LUT) == ContextLut */ 111*f4ee7fbaSAndroid Build Coastguard Worker #define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2]) 112*f4ee7fbaSAndroid Build Coastguard Worker 113*f4ee7fbaSAndroid Build Coastguard Worker #endif /* BROTLI_COMMON_CONTEXT_H_ */ 114