xref: /aosp_15_r20/external/libjpeg-turbo/jcphuff.c (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1 /*
2  * jcphuff.c
3  *
4  * This file was part of the Independent JPEG Group's software:
5  * Copyright (C) 1995-1997, Thomas G. Lane.
6  * libjpeg-turbo Modifications:
7  * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
8  * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
9  * Copyright (C) 2020, Arm Limited.
10  * Copyright (C) 2021, Alex Richardson.
11  * For conditions of distribution and use, see the accompanying README.ijg
12  * file.
13  *
14  * This file contains Huffman entropy encoding routines for progressive JPEG.
15  *
16  * We do not support output suspension in this module, since the library
17  * currently does not allow multiple-scan files to be written with output
18  * suspension.
19  */
20 
21 #define JPEG_INTERNALS
22 #include "jinclude.h"
23 #include "jpeglib.h"
24 #include "jsimd.h"
25 #include <limits.h>
26 
27 #ifdef HAVE_INTRIN_H
28 #include <intrin.h>
29 #ifdef _MSC_VER
30 #ifdef HAVE_BITSCANFORWARD64
31 #pragma intrinsic(_BitScanForward64)
32 #endif
33 #ifdef HAVE_BITSCANFORWARD
34 #pragma intrinsic(_BitScanForward)
35 #endif
36 #endif
37 #endif
38 
39 #ifdef C_PROGRESSIVE_SUPPORTED
40 
41 /*
42  * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
43  * used for bit counting rather than the lookup table.  This will reduce the
44  * memory footprint by 64k, which is important for some mobile applications
45  * that create many isolated instances of libjpeg-turbo (web browsers, for
46  * instance.)  This may improve performance on some mobile platforms as well.
47  * This feature is enabled by default only on Arm processors, because some x86
48  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
49  * shown to have a significant performance impact even on the x86 chips that
50  * have a fast implementation of it.  When building for Armv6, you can
51  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
52  * flags (this defines __thumb__).
53  */
54 
55 /* NOTE: Both GCC and Clang define __GNUC__ */
56 #if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
57     defined(_M_ARM) || defined(_M_ARM64)
58 #if !defined(__thumb__) || defined(__thumb2__)
59 #define USE_CLZ_INTRINSIC
60 #endif
61 #endif
62 
63 #ifdef USE_CLZ_INTRINSIC
64 #if defined(_MSC_VER) && !defined(__clang__)
65 #define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
66 #else
67 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
68 #endif
69 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
70 #else
71 #include "jpeg_nbits_table.h"
72 #define JPEG_NBITS(x)          (jpeg_nbits_table[x])
73 #define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
74 #endif
75 
76 
77 /* Expanded entropy encoder object for progressive Huffman encoding. */
78 
79 typedef struct {
80   struct jpeg_entropy_encoder pub; /* public fields */
81 
82   /* Pointer to routine to prepare data for encode_mcu_AC_first() */
83   void (*AC_first_prepare) (const JCOEF *block,
84                             const int *jpeg_natural_order_start, int Sl,
85                             int Al, UJCOEF *values, size_t *zerobits);
86   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
87   int (*AC_refine_prepare) (const JCOEF *block,
88                             const int *jpeg_natural_order_start, int Sl,
89                             int Al, UJCOEF *absvalues, size_t *bits);
90 
91   /* Mode flag: TRUE for optimization, FALSE for actual data output */
92   boolean gather_statistics;
93 
94   /* Bit-level coding status.
95    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
96    */
97   JOCTET *next_output_byte;     /* => next byte to write in buffer */
98   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
99   size_t put_buffer;            /* current bit-accumulation buffer */
100   int put_bits;                 /* # of bits now in it */
101   j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
102 
103   /* Coding status for DC components */
104   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
105 
106   /* Coding status for AC components */
107   int ac_tbl_no;                /* the table number of the single component */
108   unsigned int EOBRUN;          /* run length of EOBs */
109   unsigned int BE;              /* # of buffered correction bits before MCU */
110   char *bit_buffer;             /* buffer for correction bits (1 per char) */
111   /* packing correction bits tightly would save some space but cost time... */
112 
113   unsigned int restarts_to_go;  /* MCUs left in this restart interval */
114   int next_restart_num;         /* next restart number to write (0-7) */
115 
116   /* Pointers to derived tables (these workspaces have image lifespan).
117    * Since any one scan codes only DC or only AC, we only need one set
118    * of tables, not one for DC and one for AC.
119    */
120   c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
121 
122   /* Statistics tables for optimization; again, one set is enough */
123   long *count_ptrs[NUM_HUFF_TBLS];
124 } phuff_entropy_encoder;
125 
126 typedef phuff_entropy_encoder *phuff_entropy_ptr;
127 
128 /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
129  * buffer can hold.  Larger sizes may slightly improve compression, but
130  * 1000 is already well into the realm of overkill.
131  * The minimum safe size is 64 bits.
132  */
133 
134 #define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
135 
136 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
137  * We assume that int right shift is unsigned if JLONG right shift is,
138  * which should be safe.
139  */
140 
141 #ifdef RIGHT_SHIFT_IS_UNSIGNED
142 #define ISHIFT_TEMPS    int ishift_temp;
143 #define IRIGHT_SHIFT(x, shft) \
144   ((ishift_temp = (x)) < 0 ? \
145    (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
146    (ishift_temp >> (shft)))
147 #else
148 #define ISHIFT_TEMPS
149 #define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
150 #endif
151 
152 #define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
153 
154 /* Forward declarations */
155 METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
156                                        JBLOCKROW *MCU_data);
157 METHODDEF(void) encode_mcu_AC_first_prepare
158   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
159    UJCOEF *values, size_t *zerobits);
160 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
161                                        JBLOCKROW *MCU_data);
162 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
163                                         JBLOCKROW *MCU_data);
164 METHODDEF(int) encode_mcu_AC_refine_prepare
165   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
166    UJCOEF *absvalues, size_t *bits);
167 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
168                                         JBLOCKROW *MCU_data);
169 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
170 METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
171 
172 
173 /* Count bit loop zeroes */
174 INLINE
METHODDEF(int)175 METHODDEF(int)
176 count_zeroes(size_t *x)
177 {
178 #if defined(HAVE_BUILTIN_CTZL)
179   int result;
180   result = __builtin_ctzl(*x);
181   *x >>= result;
182 #elif defined(HAVE_BITSCANFORWARD64)
183   unsigned long result;
184   _BitScanForward64(&result, *x);
185   *x >>= result;
186 #elif defined(HAVE_BITSCANFORWARD)
187   unsigned long result;
188   _BitScanForward(&result, *x);
189   *x >>= result;
190 #else
191   int result = 0;
192   while ((*x & 1) == 0) {
193     ++result;
194     *x >>= 1;
195   }
196 #endif
197   return (int)result;
198 }
199 
200 
201 /*
202  * Initialize for a Huffman-compressed scan using progressive JPEG.
203  */
204 
205 METHODDEF(void)
start_pass_phuff(j_compress_ptr cinfo,boolean gather_statistics)206 start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
207 {
208   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
209   boolean is_DC_band;
210   int ci, tbl;
211   jpeg_component_info *compptr;
212 
213   entropy->cinfo = cinfo;
214   entropy->gather_statistics = gather_statistics;
215 
216   is_DC_band = (cinfo->Ss == 0);
217 
218   /* We assume jcmaster.c already validated the scan parameters. */
219 
220   /* Select execution routines */
221   if (cinfo->Ah == 0) {
222     if (is_DC_band)
223       entropy->pub.encode_mcu = encode_mcu_DC_first;
224     else
225       entropy->pub.encode_mcu = encode_mcu_AC_first;
226     if (jsimd_can_encode_mcu_AC_first_prepare())
227       entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
228     else
229       entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
230   } else {
231     if (is_DC_band)
232       entropy->pub.encode_mcu = encode_mcu_DC_refine;
233     else {
234       entropy->pub.encode_mcu = encode_mcu_AC_refine;
235       if (jsimd_can_encode_mcu_AC_refine_prepare())
236         entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
237       else
238         entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
239       /* AC refinement needs a correction bit buffer */
240       if (entropy->bit_buffer == NULL)
241         entropy->bit_buffer = (char *)
242           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
243                                       MAX_CORR_BITS * sizeof(char));
244     }
245   }
246   if (gather_statistics)
247     entropy->pub.finish_pass = finish_pass_gather_phuff;
248   else
249     entropy->pub.finish_pass = finish_pass_phuff;
250 
251   /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
252    * for AC coefficients.
253    */
254   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
255     compptr = cinfo->cur_comp_info[ci];
256     /* Initialize DC predictions to 0 */
257     entropy->last_dc_val[ci] = 0;
258     /* Get table index */
259     if (is_DC_band) {
260       if (cinfo->Ah != 0)       /* DC refinement needs no table */
261         continue;
262       tbl = compptr->dc_tbl_no;
263     } else {
264       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
265     }
266     if (gather_statistics) {
267       /* Check for invalid table index */
268       /* (make_c_derived_tbl does this in the other path) */
269       if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
270         ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
271       /* Allocate and zero the statistics tables */
272       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
273       if (entropy->count_ptrs[tbl] == NULL)
274         entropy->count_ptrs[tbl] = (long *)
275           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
276                                       257 * sizeof(long));
277       memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));
278     } else {
279       /* Compute derived values for Huffman table */
280       /* We may do this more than once for a table, but it's not expensive */
281       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
282                               &entropy->derived_tbls[tbl]);
283     }
284   }
285 
286   /* Initialize AC stuff */
287   entropy->EOBRUN = 0;
288   entropy->BE = 0;
289 
290   /* Initialize bit buffer to empty */
291   entropy->put_buffer = 0;
292   entropy->put_bits = 0;
293 
294   /* Initialize restart stuff */
295   entropy->restarts_to_go = cinfo->restart_interval;
296   entropy->next_restart_num = 0;
297 }
298 
299 
300 /* Outputting bytes to the file.
301  * NB: these must be called only when actually outputting,
302  * that is, entropy->gather_statistics == FALSE.
303  */
304 
305 /* Emit a byte */
306 #define emit_byte(entropy, val) { \
307   *(entropy)->next_output_byte++ = (JOCTET)(val); \
308   if (--(entropy)->free_in_buffer == 0) \
309     dump_buffer(entropy); \
310 }
311 
312 
313 LOCAL(void)
dump_buffer(phuff_entropy_ptr entropy)314 dump_buffer(phuff_entropy_ptr entropy)
315 /* Empty the output buffer; we do not support suspension in this module. */
316 {
317   struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
318 
319   if (!(*dest->empty_output_buffer) (entropy->cinfo))
320     ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
321   /* After a successful buffer dump, must reset buffer pointers */
322   entropy->next_output_byte = dest->next_output_byte;
323   entropy->free_in_buffer = dest->free_in_buffer;
324 }
325 
326 
327 /* Outputting bits to the file */
328 
329 /* Only the right 24 bits of put_buffer are used; the valid bits are
330  * left-justified in this part.  At most 16 bits can be passed to emit_bits
331  * in one call, and we never retain more than 7 bits in put_buffer
332  * between calls, so 24 bits are sufficient.
333  */
334 
335 LOCAL(void)
emit_bits(phuff_entropy_ptr entropy,unsigned int code,int size)336 emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
337 /* Emit some bits, unless we are in gather mode */
338 {
339   /* This routine is heavily used, so it's worth coding tightly. */
340   register size_t put_buffer = (size_t)code;
341   register int put_bits = entropy->put_bits;
342 
343   /* if size is 0, caller used an invalid Huffman table entry */
344   if (size == 0)
345     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
346 
347   if (entropy->gather_statistics)
348     return;                     /* do nothing if we're only getting stats */
349 
350   put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
351 
352   put_bits += size;             /* new number of bits in buffer */
353 
354   put_buffer <<= 24 - put_bits; /* align incoming bits */
355 
356   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
357 
358   while (put_bits >= 8) {
359     int c = (int)((put_buffer >> 16) & 0xFF);
360 
361     emit_byte(entropy, c);
362     if (c == 0xFF) {            /* need to stuff a zero byte? */
363       emit_byte(entropy, 0);
364     }
365     put_buffer <<= 8;
366     put_bits -= 8;
367   }
368 
369   entropy->put_buffer = put_buffer; /* update variables */
370   entropy->put_bits = put_bits;
371 }
372 
373 
374 LOCAL(void)
flush_bits(phuff_entropy_ptr entropy)375 flush_bits(phuff_entropy_ptr entropy)
376 {
377   emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
378   entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
379   entropy->put_bits = 0;
380 }
381 
382 
383 /*
384  * Emit (or just count) a Huffman symbol.
385  */
386 
387 LOCAL(void)
emit_symbol(phuff_entropy_ptr entropy,int tbl_no,int symbol)388 emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
389 {
390   if (entropy->gather_statistics)
391     entropy->count_ptrs[tbl_no][symbol]++;
392   else {
393     c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
394     emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
395   }
396 }
397 
398 
399 /*
400  * Emit bits from a correction bit buffer.
401  */
402 
403 LOCAL(void)
emit_buffered_bits(phuff_entropy_ptr entropy,char * bufstart,unsigned int nbits)404 emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
405                    unsigned int nbits)
406 {
407   if (entropy->gather_statistics)
408     return;                     /* no real work */
409 
410   while (nbits > 0) {
411     emit_bits(entropy, (unsigned int)(*bufstart), 1);
412     bufstart++;
413     nbits--;
414   }
415 }
416 
417 
418 /*
419  * Emit any pending EOBRUN symbol.
420  */
421 
422 LOCAL(void)
emit_eobrun(phuff_entropy_ptr entropy)423 emit_eobrun(phuff_entropy_ptr entropy)
424 {
425   register int temp, nbits;
426 
427   if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
428     temp = entropy->EOBRUN;
429     nbits = JPEG_NBITS_NONZERO(temp) - 1;
430     /* safety check: shouldn't happen given limited correction-bit buffer */
431     if (nbits > 14)
432       ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
433 
434     emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
435     if (nbits)
436       emit_bits(entropy, entropy->EOBRUN, nbits);
437 
438     entropy->EOBRUN = 0;
439 
440     /* Emit any buffered correction bits */
441     emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
442     entropy->BE = 0;
443   }
444 }
445 
446 
447 /*
448  * Emit a restart marker & resynchronize predictions.
449  */
450 
451 LOCAL(void)
emit_restart(phuff_entropy_ptr entropy,int restart_num)452 emit_restart(phuff_entropy_ptr entropy, int restart_num)
453 {
454   int ci;
455 
456   emit_eobrun(entropy);
457 
458   if (!entropy->gather_statistics) {
459     flush_bits(entropy);
460     emit_byte(entropy, 0xFF);
461     emit_byte(entropy, JPEG_RST0 + restart_num);
462   }
463 
464   if (entropy->cinfo->Ss == 0) {
465     /* Re-initialize DC predictions to 0 */
466     for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
467       entropy->last_dc_val[ci] = 0;
468   } else {
469     /* Re-initialize all AC-related fields to 0 */
470     entropy->EOBRUN = 0;
471     entropy->BE = 0;
472   }
473 }
474 
475 
476 /*
477  * MCU encoding for DC initial scan (either spectral selection,
478  * or first pass of successive approximation).
479  */
480 
481 METHODDEF(boolean)
encode_mcu_DC_first(j_compress_ptr cinfo,JBLOCKROW * MCU_data)482 encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
483 {
484   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
485   register int temp, temp2, temp3;
486   register int nbits;
487   int blkn, ci;
488   int Al = cinfo->Al;
489   JBLOCKROW block;
490   jpeg_component_info *compptr;
491   ISHIFT_TEMPS
492 
493   entropy->next_output_byte = cinfo->dest->next_output_byte;
494   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
495 
496   /* Emit restart marker if needed */
497   if (cinfo->restart_interval)
498     if (entropy->restarts_to_go == 0)
499       emit_restart(entropy, entropy->next_restart_num);
500 
501   /* Encode the MCU data blocks */
502   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
503     block = MCU_data[blkn];
504     ci = cinfo->MCU_membership[blkn];
505     compptr = cinfo->cur_comp_info[ci];
506 
507     /* Compute the DC value after the required point transform by Al.
508      * This is simply an arithmetic right shift.
509      */
510     temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
511 
512     /* DC differences are figured on the point-transformed values. */
513     temp = temp2 - entropy->last_dc_val[ci];
514     entropy->last_dc_val[ci] = temp2;
515 
516     /* Encode the DC coefficient difference per section G.1.2.1 */
517 
518     /* This is a well-known technique for obtaining the absolute value without
519      * a branch.  It is derived from an assembly language technique presented
520      * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
521      * 1997 by Agner Fog.
522      */
523     temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
524     temp ^= temp3;
525     temp -= temp3;              /* temp is abs value of input */
526     /* For a negative input, want temp2 = bitwise complement of abs(input) */
527     temp2 = temp ^ temp3;
528 
529     /* Find the number of bits needed for the magnitude of the coefficient */
530     nbits = JPEG_NBITS(temp);
531     /* Check for out-of-range coefficient values.
532      * Since we're encoding a difference, the range limit is twice as much.
533      */
534     if (nbits > MAX_COEF_BITS + 1)
535       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
536 
537     /* Count/emit the Huffman-coded symbol for the number of bits */
538     emit_symbol(entropy, compptr->dc_tbl_no, nbits);
539 
540     /* Emit that number of bits of the value, if positive, */
541     /* or the complement of its magnitude, if negative. */
542     if (nbits)                  /* emit_bits rejects calls with size 0 */
543       emit_bits(entropy, (unsigned int)temp2, nbits);
544   }
545 
546   cinfo->dest->next_output_byte = entropy->next_output_byte;
547   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
548 
549   /* Update restart-interval state too */
550   if (cinfo->restart_interval) {
551     if (entropy->restarts_to_go == 0) {
552       entropy->restarts_to_go = cinfo->restart_interval;
553       entropy->next_restart_num++;
554       entropy->next_restart_num &= 7;
555     }
556     entropy->restarts_to_go--;
557   }
558 
559   return TRUE;
560 }
561 
562 
563 /*
564  * Data preparation for encode_mcu_AC_first().
565  */
566 
567 #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
568   for (k = 0; k < Sl; k++) { \
569     temp = block[jpeg_natural_order_start[k]]; \
570     if (temp == 0) \
571       continue; \
572     /* We must apply the point transform by Al.  For AC coefficients this \
573      * is an integer division with rounding towards 0.  To do this portably \
574      * in C, we shift after obtaining the absolute value; so the code is \
575      * interwoven with finding the abs value (temp) and output bits (temp2). \
576      */ \
577     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
578     temp ^= temp2; \
579     temp -= temp2;              /* temp is abs value of input */ \
580     temp >>= Al;                /* apply the point transform */ \
581     /* Watch out for case that nonzero coef is zero after point transform */ \
582     if (temp == 0) \
583       continue; \
584     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
585     temp2 ^= temp; \
586     values[k] = (UJCOEF)temp; \
587     values[k + DCTSIZE2] = (UJCOEF)temp2; \
588     zerobits |= ((size_t)1U) << k; \
589   } \
590 }
591 
592 METHODDEF(void)
encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * bits)593 encode_mcu_AC_first_prepare(const JCOEF *block,
594                             const int *jpeg_natural_order_start, int Sl,
595                             int Al, UJCOEF *values, size_t *bits)
596 {
597   register int k, temp, temp2;
598   size_t zerobits = 0U;
599   int Sl0 = Sl;
600 
601 #if SIZEOF_SIZE_T == 4
602   if (Sl0 > 32)
603     Sl0 = 32;
604 #endif
605 
606   COMPUTE_ABSVALUES_AC_FIRST(Sl0);
607 
608   bits[0] = zerobits;
609 #if SIZEOF_SIZE_T == 4
610   zerobits = 0U;
611 
612   if (Sl > 32) {
613     Sl -= 32;
614     jpeg_natural_order_start += 32;
615     values += 32;
616 
617     COMPUTE_ABSVALUES_AC_FIRST(Sl);
618   }
619   bits[1] = zerobits;
620 #endif
621 }
622 
623 /*
624  * MCU encoding for AC initial scan (either spectral selection,
625  * or first pass of successive approximation).
626  */
627 
628 #define ENCODE_COEFS_AC_FIRST(label) { \
629   while (zerobits) { \
630     r = count_zeroes(&zerobits); \
631     cvalue += r; \
632 label \
633     temp  = cvalue[0]; \
634     temp2 = cvalue[DCTSIZE2]; \
635     \
636     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
637     while (r > 15) { \
638       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
639       r -= 16; \
640     } \
641     \
642     /* Find the number of bits needed for the magnitude of the coefficient */ \
643     nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \
644     /* Check for out-of-range coefficient values */ \
645     if (nbits > MAX_COEF_BITS) \
646       ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
647     \
648     /* Count/emit Huffman symbol for run length / number of bits */ \
649     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
650     \
651     /* Emit that number of bits of the value, if positive, */ \
652     /* or the complement of its magnitude, if negative. */ \
653     emit_bits(entropy, (unsigned int)temp2, nbits); \
654     \
655     cvalue++; \
656     zerobits >>= 1; \
657   } \
658 }
659 
660 METHODDEF(boolean)
encode_mcu_AC_first(j_compress_ptr cinfo,JBLOCKROW * MCU_data)661 encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
662 {
663   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
664   register int temp, temp2;
665   register int nbits, r;
666   int Sl = cinfo->Se - cinfo->Ss + 1;
667   int Al = cinfo->Al;
668   UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
669   UJCOEF *values;
670   const UJCOEF *cvalue;
671   size_t zerobits;
672   size_t bits[8 / SIZEOF_SIZE_T];
673 
674   entropy->next_output_byte = cinfo->dest->next_output_byte;
675   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
676 
677   /* Emit restart marker if needed */
678   if (cinfo->restart_interval)
679     if (entropy->restarts_to_go == 0)
680       emit_restart(entropy, entropy->next_restart_num);
681 
682 #ifdef WITH_SIMD
683   cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
684 #else
685   /* Not using SIMD, so alignment is not needed */
686   cvalue = values = values_unaligned;
687 #endif
688 
689   /* Prepare data */
690   entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
691                             Sl, Al, values, bits);
692 
693   zerobits = bits[0];
694 #if SIZEOF_SIZE_T == 4
695   zerobits |= bits[1];
696 #endif
697 
698   /* Emit any pending EOBRUN */
699   if (zerobits && (entropy->EOBRUN > 0))
700     emit_eobrun(entropy);
701 
702 #if SIZEOF_SIZE_T == 4
703   zerobits = bits[0];
704 #endif
705 
706   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
707 
708   ENCODE_COEFS_AC_FIRST((void)0;);
709 
710 #if SIZEOF_SIZE_T == 4
711   zerobits = bits[1];
712   if (zerobits) {
713     int diff = ((values + DCTSIZE2 / 2) - cvalue);
714     r = count_zeroes(&zerobits);
715     r += diff;
716     cvalue += r;
717     goto first_iter_ac_first;
718   }
719 
720   ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
721 #endif
722 
723   if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
724     entropy->EOBRUN++;          /* count an EOB */
725     if (entropy->EOBRUN == 0x7FFF)
726       emit_eobrun(entropy);     /* force it out to avoid overflow */
727   }
728 
729   cinfo->dest->next_output_byte = entropy->next_output_byte;
730   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
731 
732   /* Update restart-interval state too */
733   if (cinfo->restart_interval) {
734     if (entropy->restarts_to_go == 0) {
735       entropy->restarts_to_go = cinfo->restart_interval;
736       entropy->next_restart_num++;
737       entropy->next_restart_num &= 7;
738     }
739     entropy->restarts_to_go--;
740   }
741 
742   return TRUE;
743 }
744 
745 
746 /*
747  * MCU encoding for DC successive approximation refinement scan.
748  * Note: we assume such scans can be multi-component, although the spec
749  * is not very clear on the point.
750  */
751 
752 METHODDEF(boolean)
encode_mcu_DC_refine(j_compress_ptr cinfo,JBLOCKROW * MCU_data)753 encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
754 {
755   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
756   register int temp;
757   int blkn;
758   int Al = cinfo->Al;
759   JBLOCKROW block;
760 
761   entropy->next_output_byte = cinfo->dest->next_output_byte;
762   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
763 
764   /* Emit restart marker if needed */
765   if (cinfo->restart_interval)
766     if (entropy->restarts_to_go == 0)
767       emit_restart(entropy, entropy->next_restart_num);
768 
769   /* Encode the MCU data blocks */
770   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
771     block = MCU_data[blkn];
772 
773     /* We simply emit the Al'th bit of the DC coefficient value. */
774     temp = (*block)[0];
775     emit_bits(entropy, (unsigned int)(temp >> Al), 1);
776   }
777 
778   cinfo->dest->next_output_byte = entropy->next_output_byte;
779   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
780 
781   /* Update restart-interval state too */
782   if (cinfo->restart_interval) {
783     if (entropy->restarts_to_go == 0) {
784       entropy->restarts_to_go = cinfo->restart_interval;
785       entropy->next_restart_num++;
786       entropy->next_restart_num &= 7;
787     }
788     entropy->restarts_to_go--;
789   }
790 
791   return TRUE;
792 }
793 
794 
795 /*
796  * Data preparation for encode_mcu_AC_refine().
797  */
798 
799 #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
800   /* It is convenient to make a pre-pass to determine the transformed \
801    * coefficients' absolute values and the EOB position. \
802    */ \
803   for (k = 0; k < Sl; k++) { \
804     temp = block[jpeg_natural_order_start[k]]; \
805     /* We must apply the point transform by Al.  For AC coefficients this \
806      * is an integer division with rounding towards 0.  To do this portably \
807      * in C, we shift after obtaining the absolute value. \
808      */ \
809     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
810     temp ^= temp2; \
811     temp -= temp2;              /* temp is abs value of input */ \
812     temp >>= Al;                /* apply the point transform */ \
813     if (temp != 0) { \
814       zerobits |= ((size_t)1U) << k; \
815       signbits |= ((size_t)(temp2 + 1)) << k; \
816     } \
817     absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
818     if (temp == 1) \
819       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
820   } \
821 }
822 
823 METHODDEF(int)
encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)824 encode_mcu_AC_refine_prepare(const JCOEF *block,
825                              const int *jpeg_natural_order_start, int Sl,
826                              int Al, UJCOEF *absvalues, size_t *bits)
827 {
828   register int k, temp, temp2;
829   int EOB = 0;
830   size_t zerobits = 0U, signbits = 0U;
831   int Sl0 = Sl;
832 
833 #if SIZEOF_SIZE_T == 4
834   if (Sl0 > 32)
835     Sl0 = 32;
836 #endif
837 
838   COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
839 
840   bits[0] = zerobits;
841 #if SIZEOF_SIZE_T == 8
842   bits[1] = signbits;
843 #else
844   bits[2] = signbits;
845 
846   zerobits = 0U;
847   signbits = 0U;
848 
849   if (Sl > 32) {
850     Sl -= 32;
851     jpeg_natural_order_start += 32;
852     absvalues += 32;
853 
854     COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
855   }
856 
857   bits[1] = zerobits;
858   bits[3] = signbits;
859 #endif
860 
861   return EOB;
862 }
863 
864 
865 /*
866  * MCU encoding for AC successive approximation refinement scan.
867  */
868 
869 #define ENCODE_COEFS_AC_REFINE(label) { \
870   while (zerobits) { \
871     idx = count_zeroes(&zerobits); \
872     r += idx; \
873     cabsvalue += idx; \
874     signbits >>= idx; \
875 label \
876     /* Emit any required ZRLs, but not if they can be folded into EOB */ \
877     while (r > 15 && (cabsvalue <= EOBPTR)) { \
878       /* emit any pending EOBRUN and the BE correction bits */ \
879       emit_eobrun(entropy); \
880       /* Emit ZRL */ \
881       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
882       r -= 16; \
883       /* Emit buffered correction bits that must be associated with ZRL */ \
884       emit_buffered_bits(entropy, BR_buffer, BR); \
885       BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
886       BR = 0; \
887     } \
888     \
889     temp = *cabsvalue++; \
890     \
891     /* If the coef was previously nonzero, it only needs a correction bit. \
892      * NOTE: a straight translation of the spec's figure G.7 would suggest \
893      * that we also need to test r > 15.  But if r > 15, we can only get here \
894      * if k > EOB, which implies that this coefficient is not 1. \
895      */ \
896     if (temp > 1) { \
897       /* The correction bit is the next bit of the absolute value. */ \
898       BR_buffer[BR++] = (char)(temp & 1); \
899       signbits >>= 1; \
900       zerobits >>= 1; \
901       continue; \
902     } \
903     \
904     /* Emit any pending EOBRUN and the BE correction bits */ \
905     emit_eobrun(entropy); \
906     \
907     /* Count/emit Huffman symbol for run length / number of bits */ \
908     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
909     \
910     /* Emit output bit for newly-nonzero coef */ \
911     temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
912     emit_bits(entropy, (unsigned int)temp, 1); \
913     \
914     /* Emit buffered correction bits that must be associated with this code */ \
915     emit_buffered_bits(entropy, BR_buffer, BR); \
916     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
917     BR = 0; \
918     r = 0;                      /* reset zero run length */ \
919     signbits >>= 1; \
920     zerobits >>= 1; \
921   } \
922 }
923 
924 METHODDEF(boolean)
encode_mcu_AC_refine(j_compress_ptr cinfo,JBLOCKROW * MCU_data)925 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
926 {
927   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
928   register int temp, r, idx;
929   char *BR_buffer;
930   unsigned int BR;
931   int Sl = cinfo->Se - cinfo->Ss + 1;
932   int Al = cinfo->Al;
933   UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
934   UJCOEF *absvalues;
935   const UJCOEF *cabsvalue, *EOBPTR;
936   size_t zerobits, signbits;
937   size_t bits[16 / SIZEOF_SIZE_T];
938 
939   entropy->next_output_byte = cinfo->dest->next_output_byte;
940   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
941 
942   /* Emit restart marker if needed */
943   if (cinfo->restart_interval)
944     if (entropy->restarts_to_go == 0)
945       emit_restart(entropy, entropy->next_restart_num);
946 
947 #ifdef WITH_SIMD
948   cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
949 #else
950   /* Not using SIMD, so alignment is not needed */
951   cabsvalue = absvalues = absvalues_unaligned;
952 #endif
953 
954   /* Prepare data */
955   EOBPTR = absvalues +
956     entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
957                                Sl, Al, absvalues, bits);
958 
959   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
960 
961   r = 0;                        /* r = run length of zeros */
962   BR = 0;                       /* BR = count of buffered bits added now */
963   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
964 
965   zerobits = bits[0];
966 #if SIZEOF_SIZE_T == 8
967   signbits = bits[1];
968 #else
969   signbits = bits[2];
970 #endif
971   ENCODE_COEFS_AC_REFINE((void)0;);
972 
973 #if SIZEOF_SIZE_T == 4
974   zerobits = bits[1];
975   signbits = bits[3];
976 
977   if (zerobits) {
978     int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
979     idx = count_zeroes(&zerobits);
980     signbits >>= idx;
981     idx += diff;
982     r += idx;
983     cabsvalue += idx;
984     goto first_iter_ac_refine;
985   }
986 
987   ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
988 #endif
989 
990   r |= (int)((absvalues + Sl) - cabsvalue);
991 
992   if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
993     entropy->EOBRUN++;          /* count an EOB */
994     entropy->BE += BR;          /* concat my correction bits to older ones */
995     /* We force out the EOB if we risk either:
996      * 1. overflow of the EOB counter;
997      * 2. overflow of the correction bit buffer during the next MCU.
998      */
999     if (entropy->EOBRUN == 0x7FFF ||
1000         entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
1001       emit_eobrun(entropy);
1002   }
1003 
1004   cinfo->dest->next_output_byte = entropy->next_output_byte;
1005   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1006 
1007   /* Update restart-interval state too */
1008   if (cinfo->restart_interval) {
1009     if (entropy->restarts_to_go == 0) {
1010       entropy->restarts_to_go = cinfo->restart_interval;
1011       entropy->next_restart_num++;
1012       entropy->next_restart_num &= 7;
1013     }
1014     entropy->restarts_to_go--;
1015   }
1016 
1017   return TRUE;
1018 }
1019 
1020 
1021 /*
1022  * Finish up at the end of a Huffman-compressed progressive scan.
1023  */
1024 
1025 METHODDEF(void)
finish_pass_phuff(j_compress_ptr cinfo)1026 finish_pass_phuff(j_compress_ptr cinfo)
1027 {
1028   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1029 
1030   entropy->next_output_byte = cinfo->dest->next_output_byte;
1031   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
1032 
1033   /* Flush out any buffered data */
1034   emit_eobrun(entropy);
1035   flush_bits(entropy);
1036 
1037   cinfo->dest->next_output_byte = entropy->next_output_byte;
1038   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1039 }
1040 
1041 
1042 /*
1043  * Finish up a statistics-gathering pass and create the new Huffman tables.
1044  */
1045 
1046 METHODDEF(void)
finish_pass_gather_phuff(j_compress_ptr cinfo)1047 finish_pass_gather_phuff(j_compress_ptr cinfo)
1048 {
1049   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1050   boolean is_DC_band;
1051   int ci, tbl;
1052   jpeg_component_info *compptr;
1053   JHUFF_TBL **htblptr;
1054   boolean did[NUM_HUFF_TBLS];
1055 
1056   /* Flush out buffered data (all we care about is counting the EOB symbol) */
1057   emit_eobrun(entropy);
1058 
1059   is_DC_band = (cinfo->Ss == 0);
1060 
1061   /* It's important not to apply jpeg_gen_optimal_table more than once
1062    * per table, because it clobbers the input frequency counts!
1063    */
1064   memset(did, 0, sizeof(did));
1065 
1066   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
1067     compptr = cinfo->cur_comp_info[ci];
1068     if (is_DC_band) {
1069       if (cinfo->Ah != 0)       /* DC refinement needs no table */
1070         continue;
1071       tbl = compptr->dc_tbl_no;
1072     } else {
1073       tbl = compptr->ac_tbl_no;
1074     }
1075     if (!did[tbl]) {
1076       if (is_DC_band)
1077         htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
1078       else
1079         htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
1080       if (*htblptr == NULL)
1081         *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
1082       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
1083       did[tbl] = TRUE;
1084     }
1085   }
1086 }
1087 
1088 
1089 /*
1090  * Module initialization routine for progressive Huffman entropy encoding.
1091  */
1092 
1093 GLOBAL(void)
jinit_phuff_encoder(j_compress_ptr cinfo)1094 jinit_phuff_encoder(j_compress_ptr cinfo)
1095 {
1096   phuff_entropy_ptr entropy;
1097   int i;
1098 
1099   entropy = (phuff_entropy_ptr)
1100     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1101                                 sizeof(phuff_entropy_encoder));
1102   cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
1103   entropy->pub.start_pass = start_pass_phuff;
1104 
1105   /* Mark tables unallocated */
1106   for (i = 0; i < NUM_HUFF_TBLS; i++) {
1107     entropy->derived_tbls[i] = NULL;
1108     entropy->count_ptrs[i] = NULL;
1109   }
1110   entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
1111 }
1112 
1113 #endif /* C_PROGRESSIVE_SUPPORTED */
1114