xref: /aosp_15_r20/external/lzma/C/Bcj2Enc.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1*f6dc9357SAndroid Build Coastguard Worker /* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2)
2*f6dc9357SAndroid Build Coastguard Worker 2023-04-02 : Igor Pavlov : Public domain */
3*f6dc9357SAndroid Build Coastguard Worker 
4*f6dc9357SAndroid Build Coastguard Worker #include "Precomp.h"
5*f6dc9357SAndroid Build Coastguard Worker 
6*f6dc9357SAndroid Build Coastguard Worker /* #define SHOW_STAT */
7*f6dc9357SAndroid Build Coastguard Worker #ifdef SHOW_STAT
8*f6dc9357SAndroid Build Coastguard Worker #include <stdio.h>
9*f6dc9357SAndroid Build Coastguard Worker #define PRF2(s) printf("%s ip=%8x  tempPos=%d  src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src));
10*f6dc9357SAndroid Build Coastguard Worker #else
11*f6dc9357SAndroid Build Coastguard Worker #define PRF2(s)
12*f6dc9357SAndroid Build Coastguard Worker #endif
13*f6dc9357SAndroid Build Coastguard Worker 
14*f6dc9357SAndroid Build Coastguard Worker #include "Bcj2.h"
15*f6dc9357SAndroid Build Coastguard Worker #include "CpuArch.h"
16*f6dc9357SAndroid Build Coastguard Worker 
17*f6dc9357SAndroid Build Coastguard Worker #define kTopValue ((UInt32)1 << 24)
18*f6dc9357SAndroid Build Coastguard Worker #define kNumBitModelTotalBits 11
19*f6dc9357SAndroid Build Coastguard Worker #define kBitModelTotal (1 << kNumBitModelTotalBits)
20*f6dc9357SAndroid Build Coastguard Worker #define kNumMoveBits 5
21*f6dc9357SAndroid Build Coastguard Worker 
Bcj2Enc_Init(CBcj2Enc * p)22*f6dc9357SAndroid Build Coastguard Worker void Bcj2Enc_Init(CBcj2Enc *p)
23*f6dc9357SAndroid Build Coastguard Worker {
24*f6dc9357SAndroid Build Coastguard Worker   unsigned i;
25*f6dc9357SAndroid Build Coastguard Worker   p->state = BCJ2_ENC_STATE_ORIG;
26*f6dc9357SAndroid Build Coastguard Worker   p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
27*f6dc9357SAndroid Build Coastguard Worker   p->context = 0;
28*f6dc9357SAndroid Build Coastguard Worker   p->flushRem = 5;
29*f6dc9357SAndroid Build Coastguard Worker   p->isFlushState = 0;
30*f6dc9357SAndroid Build Coastguard Worker   p->cache = 0;
31*f6dc9357SAndroid Build Coastguard Worker   p->range = 0xffffffff;
32*f6dc9357SAndroid Build Coastguard Worker   p->low = 0;
33*f6dc9357SAndroid Build Coastguard Worker   p->cacheSize = 1;
34*f6dc9357SAndroid Build Coastguard Worker   p->ip64 = 0;
35*f6dc9357SAndroid Build Coastguard Worker   p->fileIp64 = 0;
36*f6dc9357SAndroid Build Coastguard Worker   p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED;
37*f6dc9357SAndroid Build Coastguard Worker   p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT;
38*f6dc9357SAndroid Build Coastguard Worker   // p->relatExcludeBits = 0;
39*f6dc9357SAndroid Build Coastguard Worker   p->tempPos = 0;
40*f6dc9357SAndroid Build Coastguard Worker   for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
41*f6dc9357SAndroid Build Coastguard Worker     p->probs[i] = kBitModelTotal >> 1;
42*f6dc9357SAndroid Build Coastguard Worker }
43*f6dc9357SAndroid Build Coastguard Worker 
44*f6dc9357SAndroid Build Coastguard Worker // Z7_NO_INLINE
45*f6dc9357SAndroid Build Coastguard Worker Z7_FORCE_INLINE
Bcj2_RangeEnc_ShiftLow(CBcj2Enc * p)46*f6dc9357SAndroid Build Coastguard Worker static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p)
47*f6dc9357SAndroid Build Coastguard Worker {
48*f6dc9357SAndroid Build Coastguard Worker   const UInt32 low = (UInt32)p->low;
49*f6dc9357SAndroid Build Coastguard Worker   const unsigned high = (unsigned)
50*f6dc9357SAndroid Build Coastguard Worker     #if defined(Z7_MSC_VER_ORIGINAL) \
51*f6dc9357SAndroid Build Coastguard Worker         && defined(MY_CPU_X86) \
52*f6dc9357SAndroid Build Coastguard Worker         && defined(MY_CPU_LE) \
53*f6dc9357SAndroid Build Coastguard Worker         && !defined(MY_CPU_64BIT)
54*f6dc9357SAndroid Build Coastguard Worker       // we try to rid of __aullshr() call in MSVS-x86
55*f6dc9357SAndroid Build Coastguard Worker       (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only
56*f6dc9357SAndroid Build Coastguard Worker     #else
57*f6dc9357SAndroid Build Coastguard Worker       (p->low >> 32);
58*f6dc9357SAndroid Build Coastguard Worker     #endif
59*f6dc9357SAndroid Build Coastguard Worker   if (low < (UInt32)0xff000000 || high != 0)
60*f6dc9357SAndroid Build Coastguard Worker   {
61*f6dc9357SAndroid Build Coastguard Worker     Byte *buf = p->bufs[BCJ2_STREAM_RC];
62*f6dc9357SAndroid Build Coastguard Worker     do
63*f6dc9357SAndroid Build Coastguard Worker     {
64*f6dc9357SAndroid Build Coastguard Worker       if (buf == p->lims[BCJ2_STREAM_RC])
65*f6dc9357SAndroid Build Coastguard Worker       {
66*f6dc9357SAndroid Build Coastguard Worker         p->state = BCJ2_STREAM_RC;
67*f6dc9357SAndroid Build Coastguard Worker         p->bufs[BCJ2_STREAM_RC] = buf;
68*f6dc9357SAndroid Build Coastguard Worker         return True;
69*f6dc9357SAndroid Build Coastguard Worker       }
70*f6dc9357SAndroid Build Coastguard Worker       *buf++ = (Byte)(p->cache + high);
71*f6dc9357SAndroid Build Coastguard Worker       p->cache = 0xff;
72*f6dc9357SAndroid Build Coastguard Worker     }
73*f6dc9357SAndroid Build Coastguard Worker     while (--p->cacheSize);
74*f6dc9357SAndroid Build Coastguard Worker     p->bufs[BCJ2_STREAM_RC] = buf;
75*f6dc9357SAndroid Build Coastguard Worker     p->cache = (Byte)(low >> 24);
76*f6dc9357SAndroid Build Coastguard Worker   }
77*f6dc9357SAndroid Build Coastguard Worker   p->cacheSize++;
78*f6dc9357SAndroid Build Coastguard Worker   p->low = low << 8;
79*f6dc9357SAndroid Build Coastguard Worker   return False;
80*f6dc9357SAndroid Build Coastguard Worker }
81*f6dc9357SAndroid Build Coastguard Worker 
82*f6dc9357SAndroid Build Coastguard Worker 
83*f6dc9357SAndroid Build Coastguard Worker /*
84*f6dc9357SAndroid Build Coastguard Worker We can use 2 alternative versions of code:
85*f6dc9357SAndroid Build Coastguard Worker 1) non-marker version:
86*f6dc9357SAndroid Build Coastguard Worker   Byte CBcj2Enc::context
87*f6dc9357SAndroid Build Coastguard Worker   Byte temp[8];
88*f6dc9357SAndroid Build Coastguard Worker   Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer.
89*f6dc9357SAndroid Build Coastguard Worker   Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction
90*f6dc9357SAndroid Build Coastguard Worker   with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call.
91*f6dc9357SAndroid Build Coastguard Worker 
92*f6dc9357SAndroid Build Coastguard Worker 2) marker version:
93*f6dc9357SAndroid Build Coastguard Worker   UInt32 CBcj2Enc::context
94*f6dc9357SAndroid Build Coastguard Worker   Byte CBcj2Enc::temp[4];
95*f6dc9357SAndroid Build Coastguard Worker   MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker.
96*f6dc9357SAndroid Build Coastguard Worker   it's allowed that
97*f6dc9357SAndroid Build Coastguard Worker     one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest,
98*f6dc9357SAndroid Build Coastguard Worker     and another call of Bcj2Enc_Encode_2() does offset conversion.
99*f6dc9357SAndroid Build Coastguard Worker     So different values of (fileIp) and (fileSize) are possible
100*f6dc9357SAndroid Build Coastguard Worker     in these different Bcj2Enc_Encode_2() calls.
101*f6dc9357SAndroid Build Coastguard Worker 
102*f6dc9357SAndroid Build Coastguard Worker Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop.
103*f6dc9357SAndroid Build Coastguard Worker So we use non-marker version.
104*f6dc9357SAndroid Build Coastguard Worker */
105*f6dc9357SAndroid Build Coastguard Worker 
106*f6dc9357SAndroid Build Coastguard Worker /*
107*f6dc9357SAndroid Build Coastguard Worker   Corner cases with overlap in multi-block.
108*f6dc9357SAndroid Build Coastguard Worker   before v23: there was one corner case, where converted instruction
109*f6dc9357SAndroid Build Coastguard Worker     could start in one sub-stream and finish in next sub-stream.
110*f6dc9357SAndroid Build Coastguard Worker   If multi-block (solid) encoding is used,
111*f6dc9357SAndroid Build Coastguard Worker     and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream.
112*f6dc9357SAndroid Build Coastguard Worker     and (0f) is last byte of previous sub-stream
113*f6dc9357SAndroid Build Coastguard Worker     and (8x) is first byte of current sub-stream
114*f6dc9357SAndroid Build Coastguard Worker   then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder.
115*f6dc9357SAndroid Build Coastguard Worker   BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage,
116*f6dc9357SAndroid Build Coastguard Worker   if that offset meets limit requirements.
117*f6dc9357SAndroid Build Coastguard Worker   If encoder allows 32-bit offset conversion for such overlap case,
118*f6dc9357SAndroid Build Coastguard Worker   then the data in 3 uncompressed BCJ2 streams for some sub-stream
119*f6dc9357SAndroid Build Coastguard Worker   can depend from data of previous sub-stream.
120*f6dc9357SAndroid Build Coastguard Worker   That corner case is not big problem, and it's rare case.
121*f6dc9357SAndroid Build Coastguard Worker   Since v23.00 we do additional check to prevent conversions in such overlap cases.
122*f6dc9357SAndroid Build Coastguard Worker */
123*f6dc9357SAndroid Build Coastguard Worker 
124*f6dc9357SAndroid Build Coastguard Worker /*
125*f6dc9357SAndroid Build Coastguard Worker   Bcj2Enc_Encode_2() output variables at exit:
126*f6dc9357SAndroid Build Coastguard Worker   {
127*f6dc9357SAndroid Build Coastguard Worker     if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG))
128*f6dc9357SAndroid Build Coastguard Worker     {
129*f6dc9357SAndroid Build Coastguard Worker       it means that encoder needs more input data.
130*f6dc9357SAndroid Build Coastguard Worker       if (p->srcLim == p->src) at exit, then
131*f6dc9357SAndroid Build Coastguard Worker       {
132*f6dc9357SAndroid Build Coastguard Worker         (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
133*f6dc9357SAndroid Build Coastguard Worker         all input data were read and processed, and we are ready for
134*f6dc9357SAndroid Build Coastguard Worker         new input data.
135*f6dc9357SAndroid Build Coastguard Worker       }
136*f6dc9357SAndroid Build Coastguard Worker       else
137*f6dc9357SAndroid Build Coastguard Worker       {
138*f6dc9357SAndroid Build Coastguard Worker         (p->srcLim != p->src)
139*f6dc9357SAndroid Build Coastguard Worker         (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
140*f6dc9357SAndroid Build Coastguard Worker           The encoder have found e8/e9/0f_8x marker,
141*f6dc9357SAndroid Build Coastguard Worker           and p->src points to last byte of that marker,
142*f6dc9357SAndroid Build Coastguard Worker           Bcj2Enc_Encode_2() needs more input data to get totally
143*f6dc9357SAndroid Build Coastguard Worker           5 bytes (last byte of marker and 32-bit branch offset)
144*f6dc9357SAndroid Build Coastguard Worker           as continuous array starting from p->src.
145*f6dc9357SAndroid Build Coastguard Worker         (p->srcLim - p->src < 5) requirement is met after exit.
146*f6dc9357SAndroid Build Coastguard Worker           So non-processed resedue from p->src to p->srcLim is always less than 5 bytes.
147*f6dc9357SAndroid Build Coastguard Worker       }
148*f6dc9357SAndroid Build Coastguard Worker     }
149*f6dc9357SAndroid Build Coastguard Worker   }
150*f6dc9357SAndroid Build Coastguard Worker */
151*f6dc9357SAndroid Build Coastguard Worker 
152*f6dc9357SAndroid Build Coastguard Worker Z7_NO_INLINE
Bcj2Enc_Encode_2(CBcj2Enc * p)153*f6dc9357SAndroid Build Coastguard Worker static void Bcj2Enc_Encode_2(CBcj2Enc *p)
154*f6dc9357SAndroid Build Coastguard Worker {
155*f6dc9357SAndroid Build Coastguard Worker   if (!p->isFlushState)
156*f6dc9357SAndroid Build Coastguard Worker   {
157*f6dc9357SAndroid Build Coastguard Worker     const Byte *src;
158*f6dc9357SAndroid Build Coastguard Worker     UInt32 v;
159*f6dc9357SAndroid Build Coastguard Worker     {
160*f6dc9357SAndroid Build Coastguard Worker       const unsigned state = p->state;
161*f6dc9357SAndroid Build Coastguard Worker       if (BCJ2_IS_32BIT_STREAM(state))
162*f6dc9357SAndroid Build Coastguard Worker       {
163*f6dc9357SAndroid Build Coastguard Worker         Byte *cur = p->bufs[state];
164*f6dc9357SAndroid Build Coastguard Worker         if (cur == p->lims[state])
165*f6dc9357SAndroid Build Coastguard Worker           return;
166*f6dc9357SAndroid Build Coastguard Worker         SetBe32a(cur, p->tempTarget)
167*f6dc9357SAndroid Build Coastguard Worker         p->bufs[state] = cur + 4;
168*f6dc9357SAndroid Build Coastguard Worker       }
169*f6dc9357SAndroid Build Coastguard Worker     }
170*f6dc9357SAndroid Build Coastguard Worker     p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit
171*f6dc9357SAndroid Build Coastguard Worker     src = p->src;
172*f6dc9357SAndroid Build Coastguard Worker     v = p->context;
173*f6dc9357SAndroid Build Coastguard Worker 
174*f6dc9357SAndroid Build Coastguard Worker     // #define WRITE_CONTEXT  p->context = v; // for marker version
175*f6dc9357SAndroid Build Coastguard Worker     #define WRITE_CONTEXT           p->context = (Byte)v;
176*f6dc9357SAndroid Build Coastguard Worker     #define WRITE_CONTEXT_AND_SRC   p->src = src;  WRITE_CONTEXT
177*f6dc9357SAndroid Build Coastguard Worker 
178*f6dc9357SAndroid Build Coastguard Worker     for (;;)
179*f6dc9357SAndroid Build Coastguard Worker     {
180*f6dc9357SAndroid Build Coastguard Worker       // const Byte *src;
181*f6dc9357SAndroid Build Coastguard Worker       // UInt32 v;
182*f6dc9357SAndroid Build Coastguard Worker       CBcj2Enc_ip_unsigned ip;
183*f6dc9357SAndroid Build Coastguard Worker       if (p->range < kTopValue)
184*f6dc9357SAndroid Build Coastguard Worker       {
185*f6dc9357SAndroid Build Coastguard Worker         // to reduce register pressure and code size: we save and restore local variables.
186*f6dc9357SAndroid Build Coastguard Worker         WRITE_CONTEXT_AND_SRC
187*f6dc9357SAndroid Build Coastguard Worker         if (Bcj2_RangeEnc_ShiftLow(p))
188*f6dc9357SAndroid Build Coastguard Worker           return;
189*f6dc9357SAndroid Build Coastguard Worker         p->range <<= 8;
190*f6dc9357SAndroid Build Coastguard Worker         src = p->src;
191*f6dc9357SAndroid Build Coastguard Worker         v = p->context;
192*f6dc9357SAndroid Build Coastguard Worker       }
193*f6dc9357SAndroid Build Coastguard Worker       // src = p->src;
194*f6dc9357SAndroid Build Coastguard Worker       // #define MARKER_FLAG  ((UInt32)1 << 17)
195*f6dc9357SAndroid Build Coastguard Worker       // if ((v & MARKER_FLAG) == 0) // for marker version
196*f6dc9357SAndroid Build Coastguard Worker       {
197*f6dc9357SAndroid Build Coastguard Worker         const Byte *srcLim;
198*f6dc9357SAndroid Build Coastguard Worker         Byte *dest = p->bufs[BCJ2_STREAM_MAIN];
199*f6dc9357SAndroid Build Coastguard Worker         {
200*f6dc9357SAndroid Build Coastguard Worker           const SizeT remSrc = (SizeT)(p->srcLim - src);
201*f6dc9357SAndroid Build Coastguard Worker           SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
202*f6dc9357SAndroid Build Coastguard Worker           if (rem >= remSrc)
203*f6dc9357SAndroid Build Coastguard Worker             rem = remSrc;
204*f6dc9357SAndroid Build Coastguard Worker           srcLim = src + rem;
205*f6dc9357SAndroid Build Coastguard Worker         }
206*f6dc9357SAndroid Build Coastguard Worker         /* p->context contains context of previous byte:
207*f6dc9357SAndroid Build Coastguard Worker            bits [0 : 7]  : src[-1], if (src) was changed in this call
208*f6dc9357SAndroid Build Coastguard Worker            bits [8 : 31] : are undefined for non-marker version
209*f6dc9357SAndroid Build Coastguard Worker         */
210*f6dc9357SAndroid Build Coastguard Worker         // v = p->context;
211*f6dc9357SAndroid Build Coastguard Worker         #define NUM_SHIFT_BITS  24
212*f6dc9357SAndroid Build Coastguard Worker         #define CONV_FLAG  ((UInt32)1 << 16)
213*f6dc9357SAndroid Build Coastguard Worker         #define ONE_ITER { \
214*f6dc9357SAndroid Build Coastguard Worker           b = src[0]; \
215*f6dc9357SAndroid Build Coastguard Worker           *dest++ = (Byte)b; \
216*f6dc9357SAndroid Build Coastguard Worker           v = (v << NUM_SHIFT_BITS) | b; \
217*f6dc9357SAndroid Build Coastguard Worker           if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
218*f6dc9357SAndroid Build Coastguard Worker           if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
219*f6dc9357SAndroid Build Coastguard Worker               ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
220*f6dc9357SAndroid Build Coastguard Worker           src++; if (src == srcLim) { break; } }
221*f6dc9357SAndroid Build Coastguard Worker 
222*f6dc9357SAndroid Build Coastguard Worker         if (src != srcLim)
223*f6dc9357SAndroid Build Coastguard Worker         for (;;)
224*f6dc9357SAndroid Build Coastguard Worker         {
225*f6dc9357SAndroid Build Coastguard Worker           /* clang can generate ineffective code with setne instead of two jcc instructions.
226*f6dc9357SAndroid Build Coastguard Worker              we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */
227*f6dc9357SAndroid Build Coastguard Worker           unsigned b;
228*f6dc9357SAndroid Build Coastguard Worker           ONE_ITER
229*f6dc9357SAndroid Build Coastguard Worker           ONE_ITER
230*f6dc9357SAndroid Build Coastguard Worker         }
231*f6dc9357SAndroid Build Coastguard Worker 
232*f6dc9357SAndroid Build Coastguard Worker         ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]);
233*f6dc9357SAndroid Build Coastguard Worker         p->bufs[BCJ2_STREAM_MAIN] = dest;
234*f6dc9357SAndroid Build Coastguard Worker         p->ip64 = ip;
235*f6dc9357SAndroid Build Coastguard Worker 
236*f6dc9357SAndroid Build Coastguard Worker         if (src == srcLim)
237*f6dc9357SAndroid Build Coastguard Worker         {
238*f6dc9357SAndroid Build Coastguard Worker           WRITE_CONTEXT_AND_SRC
239*f6dc9357SAndroid Build Coastguard Worker           if (src != p->srcLim)
240*f6dc9357SAndroid Build Coastguard Worker           {
241*f6dc9357SAndroid Build Coastguard Worker             p->state = BCJ2_STREAM_MAIN;
242*f6dc9357SAndroid Build Coastguard Worker             return;
243*f6dc9357SAndroid Build Coastguard Worker           }
244*f6dc9357SAndroid Build Coastguard Worker           /* (p->src == p->srcLim)
245*f6dc9357SAndroid Build Coastguard Worker           (p->state == BCJ2_ENC_STATE_ORIG) */
246*f6dc9357SAndroid Build Coastguard Worker           if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
247*f6dc9357SAndroid Build Coastguard Worker             return;
248*f6dc9357SAndroid Build Coastguard Worker           /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */
249*f6dc9357SAndroid Build Coastguard Worker           // (p->flushRem == 5);
250*f6dc9357SAndroid Build Coastguard Worker           p->isFlushState = 1;
251*f6dc9357SAndroid Build Coastguard Worker           break;
252*f6dc9357SAndroid Build Coastguard Worker         }
253*f6dc9357SAndroid Build Coastguard Worker         src++;
254*f6dc9357SAndroid Build Coastguard Worker         // p->src = src;
255*f6dc9357SAndroid Build Coastguard Worker       }
256*f6dc9357SAndroid Build Coastguard Worker       // ip = p->ip; // for marker version
257*f6dc9357SAndroid Build Coastguard Worker       /* marker was found */
258*f6dc9357SAndroid Build Coastguard Worker       /* (v) contains marker that was found:
259*f6dc9357SAndroid Build Coastguard Worker            bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7]
260*f6dc9357SAndroid Build Coastguard Worker                          : value of src[-2] : xx/xx/0f
261*f6dc9357SAndroid Build Coastguard Worker            bits [0 : 7]  : value of src[-1] : e8/e9/8x
262*f6dc9357SAndroid Build Coastguard Worker       */
263*f6dc9357SAndroid Build Coastguard Worker       {
264*f6dc9357SAndroid Build Coastguard Worker         {
265*f6dc9357SAndroid Build Coastguard Worker         #if NUM_SHIFT_BITS != 24
266*f6dc9357SAndroid Build Coastguard Worker           v &= ~(UInt32)CONV_FLAG;
267*f6dc9357SAndroid Build Coastguard Worker         #endif
268*f6dc9357SAndroid Build Coastguard Worker           // UInt32 relat = 0;
269*f6dc9357SAndroid Build Coastguard Worker           if ((SizeT)(p->srcLim - src) >= 4)
270*f6dc9357SAndroid Build Coastguard Worker           {
271*f6dc9357SAndroid Build Coastguard Worker             /*
272*f6dc9357SAndroid Build Coastguard Worker             if (relat != 0 || (Byte)v != 0xe8)
273*f6dc9357SAndroid Build Coastguard Worker             BoolInt isBigOffset = True;
274*f6dc9357SAndroid Build Coastguard Worker             */
275*f6dc9357SAndroid Build Coastguard Worker             const UInt32 relat = GetUi32(src);
276*f6dc9357SAndroid Build Coastguard Worker             /*
277*f6dc9357SAndroid Build Coastguard Worker             #define EXCLUDE_FLAG  ((UInt32)1 << 4)
278*f6dc9357SAndroid Build Coastguard Worker             #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0)
279*f6dc9357SAndroid Build Coastguard Worker             if (p->relatExcludeBits != 0)
280*f6dc9357SAndroid Build Coastguard Worker             {
281*f6dc9357SAndroid Build Coastguard Worker               const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1);
282*f6dc9357SAndroid Build Coastguard Worker               isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0);
283*f6dc9357SAndroid Build Coastguard Worker             }
284*f6dc9357SAndroid Build Coastguard Worker             // isBigOffset = False; // for debug
285*f6dc9357SAndroid Build Coastguard Worker             */
286*f6dc9357SAndroid Build Coastguard Worker             ip -= p->fileIp64;
287*f6dc9357SAndroid Build Coastguard Worker             // Use the following if check, if (ip) is 64-bit:
288*f6dc9357SAndroid Build Coastguard Worker             if (ip > (((v + 0x20) >> 5) & 1))  // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9)
289*f6dc9357SAndroid Build Coastguard Worker             if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1)
290*f6dc9357SAndroid Build Coastguard Worker             if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit)
291*f6dc9357SAndroid Build Coastguard Worker               v |= CONV_FLAG;
292*f6dc9357SAndroid Build Coastguard Worker           }
293*f6dc9357SAndroid Build Coastguard Worker           else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
294*f6dc9357SAndroid Build Coastguard Worker           {
295*f6dc9357SAndroid Build Coastguard Worker             // (p->srcLim - src < 4)
296*f6dc9357SAndroid Build Coastguard Worker             // /*
297*f6dc9357SAndroid Build Coastguard Worker             // for non-marker version
298*f6dc9357SAndroid Build Coastguard Worker             p->ip64--; // p->ip = ip - 1;
299*f6dc9357SAndroid Build Coastguard Worker             p->bufs[BCJ2_STREAM_MAIN]--;
300*f6dc9357SAndroid Build Coastguard Worker             src--;
301*f6dc9357SAndroid Build Coastguard Worker             v >>= NUM_SHIFT_BITS;
302*f6dc9357SAndroid Build Coastguard Worker             // (0 < p->srcLim - p->src <= 4)
303*f6dc9357SAndroid Build Coastguard Worker             // */
304*f6dc9357SAndroid Build Coastguard Worker             // v |= MARKER_FLAG; // for marker version
305*f6dc9357SAndroid Build Coastguard Worker             /* (p->state == BCJ2_ENC_STATE_ORIG) */
306*f6dc9357SAndroid Build Coastguard Worker             WRITE_CONTEXT_AND_SRC
307*f6dc9357SAndroid Build Coastguard Worker             return;
308*f6dc9357SAndroid Build Coastguard Worker           }
309*f6dc9357SAndroid Build Coastguard Worker           {
310*f6dc9357SAndroid Build Coastguard Worker             const unsigned c = ((v + 0x17) >> 6) & 1;
311*f6dc9357SAndroid Build Coastguard Worker             CBcj2Prob *prob = p->probs + (unsigned)
312*f6dc9357SAndroid Build Coastguard Worker                 (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
313*f6dc9357SAndroid Build Coastguard Worker             /*
314*f6dc9357SAndroid Build Coastguard Worker                 ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) :
315*f6dc9357SAndroid Build Coastguard Worker                 ((Byte)v < 0xe8 ? 0 : 1));  // ((v >> 5) & 1));
316*f6dc9357SAndroid Build Coastguard Worker             */
317*f6dc9357SAndroid Build Coastguard Worker             const unsigned ttt = *prob;
318*f6dc9357SAndroid Build Coastguard Worker             const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt;
319*f6dc9357SAndroid Build Coastguard Worker             if ((v & CONV_FLAG) == 0)
320*f6dc9357SAndroid Build Coastguard Worker             {
321*f6dc9357SAndroid Build Coastguard Worker               // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy);
322*f6dc9357SAndroid Build Coastguard Worker               // v = (Byte)v; // for marker version
323*f6dc9357SAndroid Build Coastguard Worker               p->range = bound;
324*f6dc9357SAndroid Build Coastguard Worker               *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
325*f6dc9357SAndroid Build Coastguard Worker               // WRITE_CONTEXT_AND_SRC
326*f6dc9357SAndroid Build Coastguard Worker               continue;
327*f6dc9357SAndroid Build Coastguard Worker             }
328*f6dc9357SAndroid Build Coastguard Worker             p->low += bound;
329*f6dc9357SAndroid Build Coastguard Worker             p->range -= bound;
330*f6dc9357SAndroid Build Coastguard Worker             *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
331*f6dc9357SAndroid Build Coastguard Worker           }
332*f6dc9357SAndroid Build Coastguard Worker           // p->context = src[3];
333*f6dc9357SAndroid Build Coastguard Worker           {
334*f6dc9357SAndroid Build Coastguard Worker             // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP);
335*f6dc9357SAndroid Build Coastguard Worker             const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
336*f6dc9357SAndroid Build Coastguard Worker             ip = p->ip64;
337*f6dc9357SAndroid Build Coastguard Worker             v = GetUi32(src); // relat
338*f6dc9357SAndroid Build Coastguard Worker             ip += 4;
339*f6dc9357SAndroid Build Coastguard Worker             p->ip64 = ip;
340*f6dc9357SAndroid Build Coastguard Worker             src += 4;
341*f6dc9357SAndroid Build Coastguard Worker             // p->src = src;
342*f6dc9357SAndroid Build Coastguard Worker             {
343*f6dc9357SAndroid Build Coastguard Worker               const UInt32 absol = (UInt32)ip + v;
344*f6dc9357SAndroid Build Coastguard Worker               Byte *cur = p->bufs[cj];
345*f6dc9357SAndroid Build Coastguard Worker               v >>= 24;
346*f6dc9357SAndroid Build Coastguard Worker               // WRITE_CONTEXT
347*f6dc9357SAndroid Build Coastguard Worker               if (cur == p->lims[cj])
348*f6dc9357SAndroid Build Coastguard Worker               {
349*f6dc9357SAndroid Build Coastguard Worker                 p->state = cj;
350*f6dc9357SAndroid Build Coastguard Worker                 p->tempTarget = absol;
351*f6dc9357SAndroid Build Coastguard Worker                 WRITE_CONTEXT_AND_SRC
352*f6dc9357SAndroid Build Coastguard Worker                 return;
353*f6dc9357SAndroid Build Coastguard Worker               }
354*f6dc9357SAndroid Build Coastguard Worker               SetBe32a(cur, absol)
355*f6dc9357SAndroid Build Coastguard Worker               p->bufs[cj] = cur + 4;
356*f6dc9357SAndroid Build Coastguard Worker             }
357*f6dc9357SAndroid Build Coastguard Worker           }
358*f6dc9357SAndroid Build Coastguard Worker         }
359*f6dc9357SAndroid Build Coastguard Worker       }
360*f6dc9357SAndroid Build Coastguard Worker     } // end of loop
361*f6dc9357SAndroid Build Coastguard Worker   }
362*f6dc9357SAndroid Build Coastguard Worker 
363*f6dc9357SAndroid Build Coastguard Worker   for (; p->flushRem != 0; p->flushRem--)
364*f6dc9357SAndroid Build Coastguard Worker     if (Bcj2_RangeEnc_ShiftLow(p))
365*f6dc9357SAndroid Build Coastguard Worker       return;
366*f6dc9357SAndroid Build Coastguard Worker   p->state = BCJ2_ENC_STATE_FINISHED;
367*f6dc9357SAndroid Build Coastguard Worker }
368*f6dc9357SAndroid Build Coastguard Worker 
369*f6dc9357SAndroid Build Coastguard Worker 
370*f6dc9357SAndroid Build Coastguard Worker /*
371*f6dc9357SAndroid Build Coastguard Worker BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer.
372*f6dc9357SAndroid Build Coastguard Worker So base function Bcj2Enc_Encode_2()
373*f6dc9357SAndroid Build Coastguard Worker   in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with
374*f6dc9357SAndroid Build Coastguard Worker   (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim)
375*f6dc9357SAndroid Build Coastguard Worker Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer.
376*f6dc9357SAndroid Build Coastguard Worker   so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(),
377*f6dc9357SAndroid Build Coastguard Worker     then (p->src == p->srcLim).
378*f6dc9357SAndroid Build Coastguard Worker   And the caller's code is simpler with Bcj2Enc_Encode().
379*f6dc9357SAndroid Build Coastguard Worker */
380*f6dc9357SAndroid Build Coastguard Worker 
381*f6dc9357SAndroid Build Coastguard Worker Z7_NO_INLINE
Bcj2Enc_Encode(CBcj2Enc * p)382*f6dc9357SAndroid Build Coastguard Worker void Bcj2Enc_Encode(CBcj2Enc *p)
383*f6dc9357SAndroid Build Coastguard Worker {
384*f6dc9357SAndroid Build Coastguard Worker   PRF2("\n----")
385*f6dc9357SAndroid Build Coastguard Worker   if (p->tempPos != 0)
386*f6dc9357SAndroid Build Coastguard Worker   {
387*f6dc9357SAndroid Build Coastguard Worker     /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */
388*f6dc9357SAndroid Build Coastguard Worker     unsigned extra = 0;
389*f6dc9357SAndroid Build Coastguard Worker     /* We will touch only minimal required number of bytes in input (src) stream.
390*f6dc9357SAndroid Build Coastguard Worker        So we will add input bytes from (src) stream to temp[] with step of 1 byte.
391*f6dc9357SAndroid Build Coastguard Worker        We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call
392*f6dc9357SAndroid Build Coastguard Worker          in first loop iteration because
393*f6dc9357SAndroid Build Coastguard Worker          - previous call of Bcj2Enc_Encode() could use another (finishMode),
394*f6dc9357SAndroid Build Coastguard Worker          - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG).
395*f6dc9357SAndroid Build Coastguard Worker        the case with full temp[] buffer (p->tempPos == 4) is possible here.
396*f6dc9357SAndroid Build Coastguard Worker     */
397*f6dc9357SAndroid Build Coastguard Worker     for (;;)
398*f6dc9357SAndroid Build Coastguard Worker     {
399*f6dc9357SAndroid Build Coastguard Worker       // (0 < p->tempPos <= 5) // in non-marker version
400*f6dc9357SAndroid Build Coastguard Worker       /* p->src : the current src data position including extra bytes
401*f6dc9357SAndroid Build Coastguard Worker                   that were copied to temp[] buffer in this call */
402*f6dc9357SAndroid Build Coastguard Worker       const Byte *src = p->src;
403*f6dc9357SAndroid Build Coastguard Worker       const Byte *srcLim = p->srcLim;
404*f6dc9357SAndroid Build Coastguard Worker       const EBcj2Enc_FinishMode finishMode = p->finishMode;
405*f6dc9357SAndroid Build Coastguard Worker       if (src != srcLim)
406*f6dc9357SAndroid Build Coastguard Worker       {
407*f6dc9357SAndroid Build Coastguard Worker         /* if there are some src data after the data copied to temp[],
408*f6dc9357SAndroid Build Coastguard Worker            then we use MODE_CONTINUE for temp data */
409*f6dc9357SAndroid Build Coastguard Worker         p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
410*f6dc9357SAndroid Build Coastguard Worker       }
411*f6dc9357SAndroid Build Coastguard Worker       p->src = p->temp;
412*f6dc9357SAndroid Build Coastguard Worker       p->srcLim = p->temp + p->tempPos;
413*f6dc9357SAndroid Build Coastguard Worker       PRF2("    ")
414*f6dc9357SAndroid Build Coastguard Worker       Bcj2Enc_Encode_2(p);
415*f6dc9357SAndroid Build Coastguard Worker       {
416*f6dc9357SAndroid Build Coastguard Worker         const unsigned num = (unsigned)(p->src - p->temp);
417*f6dc9357SAndroid Build Coastguard Worker         const unsigned tempPos = p->tempPos - num;
418*f6dc9357SAndroid Build Coastguard Worker         unsigned i;
419*f6dc9357SAndroid Build Coastguard Worker         p->tempPos = tempPos;
420*f6dc9357SAndroid Build Coastguard Worker         for (i = 0; i < tempPos; i++)
421*f6dc9357SAndroid Build Coastguard Worker           p->temp[i] = p->temp[(SizeT)i + num];
422*f6dc9357SAndroid Build Coastguard Worker         // tempPos : number of bytes in temp buffer
423*f6dc9357SAndroid Build Coastguard Worker         p->src = src;
424*f6dc9357SAndroid Build Coastguard Worker         p->srcLim = srcLim;
425*f6dc9357SAndroid Build Coastguard Worker         p->finishMode = finishMode;
426*f6dc9357SAndroid Build Coastguard Worker         if (p->state != BCJ2_ENC_STATE_ORIG)
427*f6dc9357SAndroid Build Coastguard Worker         {
428*f6dc9357SAndroid Build Coastguard Worker           // (p->tempPos <= 4) // in non-marker version
429*f6dc9357SAndroid Build Coastguard Worker           /* if (the reason of exit from Bcj2Enc_Encode_2()
430*f6dc9357SAndroid Build Coastguard Worker                  is not BCJ2_ENC_STATE_ORIG),
431*f6dc9357SAndroid Build Coastguard Worker              then we exit from Bcj2Enc_Encode() with same reason */
432*f6dc9357SAndroid Build Coastguard Worker           // optional code begin : we rollback (src) and tempPos, if it's possible:
433*f6dc9357SAndroid Build Coastguard Worker           if (extra >= tempPos)
434*f6dc9357SAndroid Build Coastguard Worker             extra = tempPos;
435*f6dc9357SAndroid Build Coastguard Worker           p->src = src - extra;
436*f6dc9357SAndroid Build Coastguard Worker           p->tempPos = tempPos - extra;
437*f6dc9357SAndroid Build Coastguard Worker           // optional code end : rollback of (src) and tempPos
438*f6dc9357SAndroid Build Coastguard Worker           return;
439*f6dc9357SAndroid Build Coastguard Worker         }
440*f6dc9357SAndroid Build Coastguard Worker         /* (p->tempPos <= 4)
441*f6dc9357SAndroid Build Coastguard Worker            (p->state == BCJ2_ENC_STATE_ORIG)
442*f6dc9357SAndroid Build Coastguard Worker              so encoder needs more data than in temp[] */
443*f6dc9357SAndroid Build Coastguard Worker         if (src == srcLim)
444*f6dc9357SAndroid Build Coastguard Worker           return; // src buffer has no more input data.
445*f6dc9357SAndroid Build Coastguard Worker         /* (src != srcLim)
446*f6dc9357SAndroid Build Coastguard Worker            so we can provide more input data from src for Bcj2Enc_Encode_2() */
447*f6dc9357SAndroid Build Coastguard Worker         if (extra >= tempPos)
448*f6dc9357SAndroid Build Coastguard Worker         {
449*f6dc9357SAndroid Build Coastguard Worker           /* (extra >= tempPos) means that temp buffer contains
450*f6dc9357SAndroid Build Coastguard Worker              only data from src buffer of this call.
451*f6dc9357SAndroid Build Coastguard Worker              So now we can encode without temp buffer */
452*f6dc9357SAndroid Build Coastguard Worker           p->src = src - tempPos; // rollback (src)
453*f6dc9357SAndroid Build Coastguard Worker           p->tempPos = 0;
454*f6dc9357SAndroid Build Coastguard Worker           break;
455*f6dc9357SAndroid Build Coastguard Worker         }
456*f6dc9357SAndroid Build Coastguard Worker         // we append one additional extra byte from (src) to temp[] buffer:
457*f6dc9357SAndroid Build Coastguard Worker         p->temp[tempPos] = *src;
458*f6dc9357SAndroid Build Coastguard Worker         p->tempPos = tempPos + 1;
459*f6dc9357SAndroid Build Coastguard Worker         // (0 < p->tempPos <= 5) // in non-marker version
460*f6dc9357SAndroid Build Coastguard Worker         p->src = src + 1;
461*f6dc9357SAndroid Build Coastguard Worker         extra++;
462*f6dc9357SAndroid Build Coastguard Worker       }
463*f6dc9357SAndroid Build Coastguard Worker     }
464*f6dc9357SAndroid Build Coastguard Worker   }
465*f6dc9357SAndroid Build Coastguard Worker 
466*f6dc9357SAndroid Build Coastguard Worker   PRF2("++++")
467*f6dc9357SAndroid Build Coastguard Worker   // (p->tempPos == 0)
468*f6dc9357SAndroid Build Coastguard Worker   Bcj2Enc_Encode_2(p);
469*f6dc9357SAndroid Build Coastguard Worker   PRF2("====")
470*f6dc9357SAndroid Build Coastguard Worker 
471*f6dc9357SAndroid Build Coastguard Worker   if (p->state == BCJ2_ENC_STATE_ORIG)
472*f6dc9357SAndroid Build Coastguard Worker   {
473*f6dc9357SAndroid Build Coastguard Worker     const Byte *src = p->src;
474*f6dc9357SAndroid Build Coastguard Worker     const Byte *srcLim = p->srcLim;
475*f6dc9357SAndroid Build Coastguard Worker     const unsigned rem = (unsigned)(srcLim - src);
476*f6dc9357SAndroid Build Coastguard Worker     /* (rem <= 4) here.
477*f6dc9357SAndroid Build Coastguard Worker        if (p->src != p->srcLim), then
478*f6dc9357SAndroid Build Coastguard Worker          - we copy non-processed bytes from (p->src) to temp[] buffer,
479*f6dc9357SAndroid Build Coastguard Worker          - we set p->src equal to p->srcLim.
480*f6dc9357SAndroid Build Coastguard Worker     */
481*f6dc9357SAndroid Build Coastguard Worker     if (rem)
482*f6dc9357SAndroid Build Coastguard Worker     {
483*f6dc9357SAndroid Build Coastguard Worker       unsigned i = 0;
484*f6dc9357SAndroid Build Coastguard Worker       p->src = srcLim;
485*f6dc9357SAndroid Build Coastguard Worker       p->tempPos = rem;
486*f6dc9357SAndroid Build Coastguard Worker       // (0 < p->tempPos <= 4)
487*f6dc9357SAndroid Build Coastguard Worker       do
488*f6dc9357SAndroid Build Coastguard Worker         p->temp[i] = src[i];
489*f6dc9357SAndroid Build Coastguard Worker       while (++i != rem);
490*f6dc9357SAndroid Build Coastguard Worker     }
491*f6dc9357SAndroid Build Coastguard Worker     // (p->tempPos <= 4)
492*f6dc9357SAndroid Build Coastguard Worker     // (p->src == p->srcLim)
493*f6dc9357SAndroid Build Coastguard Worker   }
494*f6dc9357SAndroid Build Coastguard Worker }
495*f6dc9357SAndroid Build Coastguard Worker 
496*f6dc9357SAndroid Build Coastguard Worker #undef PRF2
497*f6dc9357SAndroid Build Coastguard Worker #undef CONV_FLAG
498*f6dc9357SAndroid Build Coastguard Worker #undef MARKER_FLAG
499*f6dc9357SAndroid Build Coastguard Worker #undef WRITE_CONTEXT
500*f6dc9357SAndroid Build Coastguard Worker #undef WRITE_CONTEXT_AND_SRC
501*f6dc9357SAndroid Build Coastguard Worker #undef ONE_ITER
502*f6dc9357SAndroid Build Coastguard Worker #undef NUM_SHIFT_BITS
503*f6dc9357SAndroid Build Coastguard Worker #undef kTopValue
504*f6dc9357SAndroid Build Coastguard Worker #undef kNumBitModelTotalBits
505*f6dc9357SAndroid Build Coastguard Worker #undef kBitModelTotal
506*f6dc9357SAndroid Build Coastguard Worker #undef kNumMoveBits
507