1*b2055c35SXin Li // Copyright 2012 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // Author: Jyrki Alakuijala ([email protected])
11*b2055c35SXin Li //
12*b2055c35SXin Li
13*b2055c35SXin Li #include "src/enc/backward_references_enc.h"
14*b2055c35SXin Li
15*b2055c35SXin Li #include <assert.h>
16*b2055c35SXin Li #include <float.h>
17*b2055c35SXin Li #include <math.h>
18*b2055c35SXin Li
19*b2055c35SXin Li #include "src/dsp/dsp.h"
20*b2055c35SXin Li #include "src/dsp/lossless.h"
21*b2055c35SXin Li #include "src/dsp/lossless_common.h"
22*b2055c35SXin Li #include "src/enc/histogram_enc.h"
23*b2055c35SXin Li #include "src/enc/vp8i_enc.h"
24*b2055c35SXin Li #include "src/utils/color_cache_utils.h"
25*b2055c35SXin Li #include "src/utils/utils.h"
26*b2055c35SXin Li #include "src/webp/encode.h"
27*b2055c35SXin Li
28*b2055c35SXin Li #define MIN_BLOCK_SIZE 256 // minimum block size for backward references
29*b2055c35SXin Li
30*b2055c35SXin Li #define MAX_ENTROPY (1e30f)
31*b2055c35SXin Li
32*b2055c35SXin Li // 1M window (4M bytes) minus 120 special codes for short distances.
33*b2055c35SXin Li #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)
34*b2055c35SXin Li
35*b2055c35SXin Li // Minimum number of pixels for which it is cheaper to encode a
36*b2055c35SXin Li // distance + length instead of each pixel as a literal.
37*b2055c35SXin Li #define MIN_LENGTH 4
38*b2055c35SXin Li
39*b2055c35SXin Li // -----------------------------------------------------------------------------
40*b2055c35SXin Li
41*b2055c35SXin Li static const uint8_t plane_to_code_lut[128] = {
42*b2055c35SXin Li 96, 73, 55, 39, 23, 13, 5, 1, 255, 255, 255, 255, 255, 255, 255, 255,
43*b2055c35SXin Li 101, 78, 58, 42, 26, 16, 8, 2, 0, 3, 9, 17, 27, 43, 59, 79,
44*b2055c35SXin Li 102, 86, 62, 46, 32, 20, 10, 6, 4, 7, 11, 21, 33, 47, 63, 87,
45*b2055c35SXin Li 105, 90, 70, 52, 37, 28, 18, 14, 12, 15, 19, 29, 38, 53, 71, 91,
46*b2055c35SXin Li 110, 99, 82, 66, 48, 35, 30, 24, 22, 25, 31, 36, 49, 67, 83, 100,
47*b2055c35SXin Li 115, 108, 94, 76, 64, 50, 44, 40, 34, 41, 45, 51, 65, 77, 95, 109,
48*b2055c35SXin Li 118, 113, 103, 92, 80, 68, 60, 56, 54, 57, 61, 69, 81, 93, 104, 114,
49*b2055c35SXin Li 119, 116, 111, 106, 97, 88, 84, 74, 72, 75, 85, 89, 98, 107, 112, 117
50*b2055c35SXin Li };
51*b2055c35SXin Li
52*b2055c35SXin Li extern int VP8LDistanceToPlaneCode(int xsize, int dist);
VP8LDistanceToPlaneCode(int xsize,int dist)53*b2055c35SXin Li int VP8LDistanceToPlaneCode(int xsize, int dist) {
54*b2055c35SXin Li const int yoffset = dist / xsize;
55*b2055c35SXin Li const int xoffset = dist - yoffset * xsize;
56*b2055c35SXin Li if (xoffset <= 8 && yoffset < 8) {
57*b2055c35SXin Li return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
58*b2055c35SXin Li } else if (xoffset > xsize - 8 && yoffset < 7) {
59*b2055c35SXin Li return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
60*b2055c35SXin Li }
61*b2055c35SXin Li return dist + 120;
62*b2055c35SXin Li }
63*b2055c35SXin Li
64*b2055c35SXin Li // Returns the exact index where array1 and array2 are different. For an index
65*b2055c35SXin Li // inferior or equal to best_len_match, the return value just has to be strictly
66*b2055c35SXin Li // inferior to best_len_match. The current behavior is to return 0 if this index
67*b2055c35SXin Li // is best_len_match, and the index itself otherwise.
68*b2055c35SXin Li // If no two elements are the same, it returns max_limit.
FindMatchLength(const uint32_t * const array1,const uint32_t * const array2,int best_len_match,int max_limit)69*b2055c35SXin Li static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
70*b2055c35SXin Li const uint32_t* const array2,
71*b2055c35SXin Li int best_len_match, int max_limit) {
72*b2055c35SXin Li // Before 'expensive' linear match, check if the two arrays match at the
73*b2055c35SXin Li // current best length index.
74*b2055c35SXin Li if (array1[best_len_match] != array2[best_len_match]) return 0;
75*b2055c35SXin Li
76*b2055c35SXin Li return VP8LVectorMismatch(array1, array2, max_limit);
77*b2055c35SXin Li }
78*b2055c35SXin Li
79*b2055c35SXin Li // -----------------------------------------------------------------------------
80*b2055c35SXin Li // VP8LBackwardRefs
81*b2055c35SXin Li
82*b2055c35SXin Li struct PixOrCopyBlock {
83*b2055c35SXin Li PixOrCopyBlock* next_; // next block (or NULL)
84*b2055c35SXin Li PixOrCopy* start_; // data start
85*b2055c35SXin Li int size_; // currently used size
86*b2055c35SXin Li };
87*b2055c35SXin Li
88*b2055c35SXin Li extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
VP8LClearBackwardRefs(VP8LBackwardRefs * const refs)89*b2055c35SXin Li void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
90*b2055c35SXin Li assert(refs != NULL);
91*b2055c35SXin Li if (refs->tail_ != NULL) {
92*b2055c35SXin Li *refs->tail_ = refs->free_blocks_; // recycle all blocks at once
93*b2055c35SXin Li }
94*b2055c35SXin Li refs->free_blocks_ = refs->refs_;
95*b2055c35SXin Li refs->tail_ = &refs->refs_;
96*b2055c35SXin Li refs->last_block_ = NULL;
97*b2055c35SXin Li refs->refs_ = NULL;
98*b2055c35SXin Li }
99*b2055c35SXin Li
VP8LBackwardRefsClear(VP8LBackwardRefs * const refs)100*b2055c35SXin Li void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
101*b2055c35SXin Li assert(refs != NULL);
102*b2055c35SXin Li VP8LClearBackwardRefs(refs);
103*b2055c35SXin Li while (refs->free_blocks_ != NULL) {
104*b2055c35SXin Li PixOrCopyBlock* const next = refs->free_blocks_->next_;
105*b2055c35SXin Li WebPSafeFree(refs->free_blocks_);
106*b2055c35SXin Li refs->free_blocks_ = next;
107*b2055c35SXin Li }
108*b2055c35SXin Li }
109*b2055c35SXin Li
110*b2055c35SXin Li // Swaps the content of two VP8LBackwardRefs.
BackwardRefsSwap(VP8LBackwardRefs * const refs1,VP8LBackwardRefs * const refs2)111*b2055c35SXin Li static void BackwardRefsSwap(VP8LBackwardRefs* const refs1,
112*b2055c35SXin Li VP8LBackwardRefs* const refs2) {
113*b2055c35SXin Li const int point_to_refs1 =
114*b2055c35SXin Li (refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_);
115*b2055c35SXin Li const int point_to_refs2 =
116*b2055c35SXin Li (refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_);
117*b2055c35SXin Li const VP8LBackwardRefs tmp = *refs1;
118*b2055c35SXin Li *refs1 = *refs2;
119*b2055c35SXin Li *refs2 = tmp;
120*b2055c35SXin Li if (point_to_refs2) refs1->tail_ = &refs1->refs_;
121*b2055c35SXin Li if (point_to_refs1) refs2->tail_ = &refs2->refs_;
122*b2055c35SXin Li }
123*b2055c35SXin Li
VP8LBackwardRefsInit(VP8LBackwardRefs * const refs,int block_size)124*b2055c35SXin Li void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
125*b2055c35SXin Li assert(refs != NULL);
126*b2055c35SXin Li memset(refs, 0, sizeof(*refs));
127*b2055c35SXin Li refs->tail_ = &refs->refs_;
128*b2055c35SXin Li refs->block_size_ =
129*b2055c35SXin Li (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
130*b2055c35SXin Li }
131*b2055c35SXin Li
VP8LRefsCursorInit(const VP8LBackwardRefs * const refs)132*b2055c35SXin Li VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
133*b2055c35SXin Li VP8LRefsCursor c;
134*b2055c35SXin Li c.cur_block_ = refs->refs_;
135*b2055c35SXin Li if (refs->refs_ != NULL) {
136*b2055c35SXin Li c.cur_pos = c.cur_block_->start_;
137*b2055c35SXin Li c.last_pos_ = c.cur_pos + c.cur_block_->size_;
138*b2055c35SXin Li } else {
139*b2055c35SXin Li c.cur_pos = NULL;
140*b2055c35SXin Li c.last_pos_ = NULL;
141*b2055c35SXin Li }
142*b2055c35SXin Li return c;
143*b2055c35SXin Li }
144*b2055c35SXin Li
VP8LRefsCursorNextBlock(VP8LRefsCursor * const c)145*b2055c35SXin Li void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
146*b2055c35SXin Li PixOrCopyBlock* const b = c->cur_block_->next_;
147*b2055c35SXin Li c->cur_pos = (b == NULL) ? NULL : b->start_;
148*b2055c35SXin Li c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
149*b2055c35SXin Li c->cur_block_ = b;
150*b2055c35SXin Li }
151*b2055c35SXin Li
152*b2055c35SXin Li // Create a new block, either from the free list or allocated
BackwardRefsNewBlock(VP8LBackwardRefs * const refs)153*b2055c35SXin Li static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
154*b2055c35SXin Li PixOrCopyBlock* b = refs->free_blocks_;
155*b2055c35SXin Li if (b == NULL) { // allocate new memory chunk
156*b2055c35SXin Li const size_t total_size =
157*b2055c35SXin Li sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
158*b2055c35SXin Li b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
159*b2055c35SXin Li if (b == NULL) {
160*b2055c35SXin Li refs->error_ |= 1;
161*b2055c35SXin Li return NULL;
162*b2055c35SXin Li }
163*b2055c35SXin Li b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b)); // not always aligned
164*b2055c35SXin Li } else { // recycle from free-list
165*b2055c35SXin Li refs->free_blocks_ = b->next_;
166*b2055c35SXin Li }
167*b2055c35SXin Li *refs->tail_ = b;
168*b2055c35SXin Li refs->tail_ = &b->next_;
169*b2055c35SXin Li refs->last_block_ = b;
170*b2055c35SXin Li b->next_ = NULL;
171*b2055c35SXin Li b->size_ = 0;
172*b2055c35SXin Li return b;
173*b2055c35SXin Li }
174*b2055c35SXin Li
175*b2055c35SXin Li // Return 1 on success, 0 on error.
BackwardRefsClone(const VP8LBackwardRefs * const from,VP8LBackwardRefs * const to)176*b2055c35SXin Li static int BackwardRefsClone(const VP8LBackwardRefs* const from,
177*b2055c35SXin Li VP8LBackwardRefs* const to) {
178*b2055c35SXin Li const PixOrCopyBlock* block_from = from->refs_;
179*b2055c35SXin Li VP8LClearBackwardRefs(to);
180*b2055c35SXin Li while (block_from != NULL) {
181*b2055c35SXin Li PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to);
182*b2055c35SXin Li if (block_to == NULL) return 0;
183*b2055c35SXin Li memcpy(block_to->start_, block_from->start_,
184*b2055c35SXin Li block_from->size_ * sizeof(PixOrCopy));
185*b2055c35SXin Li block_to->size_ = block_from->size_;
186*b2055c35SXin Li block_from = block_from->next_;
187*b2055c35SXin Li }
188*b2055c35SXin Li return 1;
189*b2055c35SXin Li }
190*b2055c35SXin Li
191*b2055c35SXin Li extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
192*b2055c35SXin Li const PixOrCopy v);
VP8LBackwardRefsCursorAdd(VP8LBackwardRefs * const refs,const PixOrCopy v)193*b2055c35SXin Li void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
194*b2055c35SXin Li const PixOrCopy v) {
195*b2055c35SXin Li PixOrCopyBlock* b = refs->last_block_;
196*b2055c35SXin Li if (b == NULL || b->size_ == refs->block_size_) {
197*b2055c35SXin Li b = BackwardRefsNewBlock(refs);
198*b2055c35SXin Li if (b == NULL) return; // refs->error_ is set
199*b2055c35SXin Li }
200*b2055c35SXin Li b->start_[b->size_++] = v;
201*b2055c35SXin Li }
202*b2055c35SXin Li
203*b2055c35SXin Li // -----------------------------------------------------------------------------
204*b2055c35SXin Li // Hash chains
205*b2055c35SXin Li
VP8LHashChainInit(VP8LHashChain * const p,int size)206*b2055c35SXin Li int VP8LHashChainInit(VP8LHashChain* const p, int size) {
207*b2055c35SXin Li assert(p->size_ == 0);
208*b2055c35SXin Li assert(p->offset_length_ == NULL);
209*b2055c35SXin Li assert(size > 0);
210*b2055c35SXin Li p->offset_length_ =
211*b2055c35SXin Li (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_));
212*b2055c35SXin Li if (p->offset_length_ == NULL) return 0;
213*b2055c35SXin Li p->size_ = size;
214*b2055c35SXin Li
215*b2055c35SXin Li return 1;
216*b2055c35SXin Li }
217*b2055c35SXin Li
VP8LHashChainClear(VP8LHashChain * const p)218*b2055c35SXin Li void VP8LHashChainClear(VP8LHashChain* const p) {
219*b2055c35SXin Li assert(p != NULL);
220*b2055c35SXin Li WebPSafeFree(p->offset_length_);
221*b2055c35SXin Li
222*b2055c35SXin Li p->size_ = 0;
223*b2055c35SXin Li p->offset_length_ = NULL;
224*b2055c35SXin Li }
225*b2055c35SXin Li
226*b2055c35SXin Li // -----------------------------------------------------------------------------
227*b2055c35SXin Li
228*b2055c35SXin Li static const uint32_t kHashMultiplierHi = 0xc6a4a793u;
229*b2055c35SXin Li static const uint32_t kHashMultiplierLo = 0x5bd1e996u;
230*b2055c35SXin Li
231*b2055c35SXin Li static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
GetPixPairHash64(const uint32_t * const argb)232*b2055c35SXin Li uint32_t GetPixPairHash64(const uint32_t* const argb) {
233*b2055c35SXin Li uint32_t key;
234*b2055c35SXin Li key = argb[1] * kHashMultiplierHi;
235*b2055c35SXin Li key += argb[0] * kHashMultiplierLo;
236*b2055c35SXin Li key = key >> (32 - HASH_BITS);
237*b2055c35SXin Li return key;
238*b2055c35SXin Li }
239*b2055c35SXin Li
240*b2055c35SXin Li // Returns the maximum number of hash chain lookups to do for a
241*b2055c35SXin Li // given compression quality. Return value in range [8, 86].
GetMaxItersForQuality(int quality)242*b2055c35SXin Li static int GetMaxItersForQuality(int quality) {
243*b2055c35SXin Li return 8 + (quality * quality) / 128;
244*b2055c35SXin Li }
245*b2055c35SXin Li
GetWindowSizeForHashChain(int quality,int xsize)246*b2055c35SXin Li static int GetWindowSizeForHashChain(int quality, int xsize) {
247*b2055c35SXin Li const int max_window_size = (quality > 75) ? WINDOW_SIZE
248*b2055c35SXin Li : (quality > 50) ? (xsize << 8)
249*b2055c35SXin Li : (quality > 25) ? (xsize << 6)
250*b2055c35SXin Li : (xsize << 4);
251*b2055c35SXin Li assert(xsize > 0);
252*b2055c35SXin Li return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
253*b2055c35SXin Li }
254*b2055c35SXin Li
MaxFindCopyLength(int len)255*b2055c35SXin Li static WEBP_INLINE int MaxFindCopyLength(int len) {
256*b2055c35SXin Li return (len < MAX_LENGTH) ? len : MAX_LENGTH;
257*b2055c35SXin Li }
258*b2055c35SXin Li
VP8LHashChainFill(VP8LHashChain * const p,int quality,const uint32_t * const argb,int xsize,int ysize,int low_effort,const WebPPicture * const pic,int percent_range,int * const percent)259*b2055c35SXin Li int VP8LHashChainFill(VP8LHashChain* const p, int quality,
260*b2055c35SXin Li const uint32_t* const argb, int xsize, int ysize,
261*b2055c35SXin Li int low_effort, const WebPPicture* const pic,
262*b2055c35SXin Li int percent_range, int* const percent) {
263*b2055c35SXin Li const int size = xsize * ysize;
264*b2055c35SXin Li const int iter_max = GetMaxItersForQuality(quality);
265*b2055c35SXin Li const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
266*b2055c35SXin Li int remaining_percent = percent_range;
267*b2055c35SXin Li int percent_start = *percent;
268*b2055c35SXin Li int pos;
269*b2055c35SXin Li int argb_comp;
270*b2055c35SXin Li uint32_t base_position;
271*b2055c35SXin Li int32_t* hash_to_first_index;
272*b2055c35SXin Li // Temporarily use the p->offset_length_ as a hash chain.
273*b2055c35SXin Li int32_t* chain = (int32_t*)p->offset_length_;
274*b2055c35SXin Li assert(size > 0);
275*b2055c35SXin Li assert(p->size_ != 0);
276*b2055c35SXin Li assert(p->offset_length_ != NULL);
277*b2055c35SXin Li
278*b2055c35SXin Li if (size <= 2) {
279*b2055c35SXin Li p->offset_length_[0] = p->offset_length_[size - 1] = 0;
280*b2055c35SXin Li return 1;
281*b2055c35SXin Li }
282*b2055c35SXin Li
283*b2055c35SXin Li hash_to_first_index =
284*b2055c35SXin Li (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
285*b2055c35SXin Li if (hash_to_first_index == NULL) {
286*b2055c35SXin Li return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
287*b2055c35SXin Li }
288*b2055c35SXin Li
289*b2055c35SXin Li percent_range = remaining_percent / 2;
290*b2055c35SXin Li remaining_percent -= percent_range;
291*b2055c35SXin Li
292*b2055c35SXin Li // Set the int32_t array to -1.
293*b2055c35SXin Li memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
294*b2055c35SXin Li // Fill the chain linking pixels with the same hash.
295*b2055c35SXin Li argb_comp = (argb[0] == argb[1]);
296*b2055c35SXin Li for (pos = 0; pos < size - 2;) {
297*b2055c35SXin Li uint32_t hash_code;
298*b2055c35SXin Li const int argb_comp_next = (argb[pos + 1] == argb[pos + 2]);
299*b2055c35SXin Li if (argb_comp && argb_comp_next) {
300*b2055c35SXin Li // Consecutive pixels with the same color will share the same hash.
301*b2055c35SXin Li // We therefore use a different hash: the color and its repetition
302*b2055c35SXin Li // length.
303*b2055c35SXin Li uint32_t tmp[2];
304*b2055c35SXin Li uint32_t len = 1;
305*b2055c35SXin Li tmp[0] = argb[pos];
306*b2055c35SXin Li // Figure out how far the pixels are the same.
307*b2055c35SXin Li // The last pixel has a different 64 bit hash, as its next pixel does
308*b2055c35SXin Li // not have the same color, so we just need to get to the last pixel equal
309*b2055c35SXin Li // to its follower.
310*b2055c35SXin Li while (pos + (int)len + 2 < size && argb[pos + len + 2] == argb[pos]) {
311*b2055c35SXin Li ++len;
312*b2055c35SXin Li }
313*b2055c35SXin Li if (len > MAX_LENGTH) {
314*b2055c35SXin Li // Skip the pixels that match for distance=1 and length>MAX_LENGTH
315*b2055c35SXin Li // because they are linked to their predecessor and we automatically
316*b2055c35SXin Li // check that in the main for loop below. Skipping means setting no
317*b2055c35SXin Li // predecessor in the chain, hence -1.
318*b2055c35SXin Li memset(chain + pos, 0xff, (len - MAX_LENGTH) * sizeof(*chain));
319*b2055c35SXin Li pos += len - MAX_LENGTH;
320*b2055c35SXin Li len = MAX_LENGTH;
321*b2055c35SXin Li }
322*b2055c35SXin Li // Process the rest of the hash chain.
323*b2055c35SXin Li while (len) {
324*b2055c35SXin Li tmp[1] = len--;
325*b2055c35SXin Li hash_code = GetPixPairHash64(tmp);
326*b2055c35SXin Li chain[pos] = hash_to_first_index[hash_code];
327*b2055c35SXin Li hash_to_first_index[hash_code] = pos++;
328*b2055c35SXin Li }
329*b2055c35SXin Li argb_comp = 0;
330*b2055c35SXin Li } else {
331*b2055c35SXin Li // Just move one pixel forward.
332*b2055c35SXin Li hash_code = GetPixPairHash64(argb + pos);
333*b2055c35SXin Li chain[pos] = hash_to_first_index[hash_code];
334*b2055c35SXin Li hash_to_first_index[hash_code] = pos++;
335*b2055c35SXin Li argb_comp = argb_comp_next;
336*b2055c35SXin Li }
337*b2055c35SXin Li
338*b2055c35SXin Li if (!WebPReportProgress(
339*b2055c35SXin Li pic, percent_start + percent_range * pos / (size - 2), percent)) {
340*b2055c35SXin Li WebPSafeFree(hash_to_first_index);
341*b2055c35SXin Li return 0;
342*b2055c35SXin Li }
343*b2055c35SXin Li }
344*b2055c35SXin Li // Process the penultimate pixel.
345*b2055c35SXin Li chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
346*b2055c35SXin Li
347*b2055c35SXin Li WebPSafeFree(hash_to_first_index);
348*b2055c35SXin Li
349*b2055c35SXin Li percent_start += percent_range;
350*b2055c35SXin Li if (!WebPReportProgress(pic, percent_start, percent)) return 0;
351*b2055c35SXin Li percent_range = remaining_percent;
352*b2055c35SXin Li
353*b2055c35SXin Li // Find the best match interval at each pixel, defined by an offset to the
354*b2055c35SXin Li // pixel and a length. The right-most pixel cannot match anything to the right
355*b2055c35SXin Li // (hence a best length of 0) and the left-most pixel nothing to the left
356*b2055c35SXin Li // (hence an offset of 0).
357*b2055c35SXin Li assert(size > 2);
358*b2055c35SXin Li p->offset_length_[0] = p->offset_length_[size - 1] = 0;
359*b2055c35SXin Li for (base_position = size - 2; base_position > 0;) {
360*b2055c35SXin Li const int max_len = MaxFindCopyLength(size - 1 - base_position);
361*b2055c35SXin Li const uint32_t* const argb_start = argb + base_position;
362*b2055c35SXin Li int iter = iter_max;
363*b2055c35SXin Li int best_length = 0;
364*b2055c35SXin Li uint32_t best_distance = 0;
365*b2055c35SXin Li uint32_t best_argb;
366*b2055c35SXin Li const int min_pos =
367*b2055c35SXin Li (base_position > window_size) ? base_position - window_size : 0;
368*b2055c35SXin Li const int length_max = (max_len < 256) ? max_len : 256;
369*b2055c35SXin Li uint32_t max_base_position;
370*b2055c35SXin Li
371*b2055c35SXin Li pos = chain[base_position];
372*b2055c35SXin Li if (!low_effort) {
373*b2055c35SXin Li int curr_length;
374*b2055c35SXin Li // Heuristic: use the comparison with the above line as an initialization.
375*b2055c35SXin Li if (base_position >= (uint32_t)xsize) {
376*b2055c35SXin Li curr_length = FindMatchLength(argb_start - xsize, argb_start,
377*b2055c35SXin Li best_length, max_len);
378*b2055c35SXin Li if (curr_length > best_length) {
379*b2055c35SXin Li best_length = curr_length;
380*b2055c35SXin Li best_distance = xsize;
381*b2055c35SXin Li }
382*b2055c35SXin Li --iter;
383*b2055c35SXin Li }
384*b2055c35SXin Li // Heuristic: compare to the previous pixel.
385*b2055c35SXin Li curr_length =
386*b2055c35SXin Li FindMatchLength(argb_start - 1, argb_start, best_length, max_len);
387*b2055c35SXin Li if (curr_length > best_length) {
388*b2055c35SXin Li best_length = curr_length;
389*b2055c35SXin Li best_distance = 1;
390*b2055c35SXin Li }
391*b2055c35SXin Li --iter;
392*b2055c35SXin Li // Skip the for loop if we already have the maximum.
393*b2055c35SXin Li if (best_length == MAX_LENGTH) pos = min_pos - 1;
394*b2055c35SXin Li }
395*b2055c35SXin Li best_argb = argb_start[best_length];
396*b2055c35SXin Li
397*b2055c35SXin Li for (; pos >= min_pos && --iter; pos = chain[pos]) {
398*b2055c35SXin Li int curr_length;
399*b2055c35SXin Li assert(base_position > (uint32_t)pos);
400*b2055c35SXin Li
401*b2055c35SXin Li if (argb[pos + best_length] != best_argb) continue;
402*b2055c35SXin Li
403*b2055c35SXin Li curr_length = VP8LVectorMismatch(argb + pos, argb_start, max_len);
404*b2055c35SXin Li if (best_length < curr_length) {
405*b2055c35SXin Li best_length = curr_length;
406*b2055c35SXin Li best_distance = base_position - pos;
407*b2055c35SXin Li best_argb = argb_start[best_length];
408*b2055c35SXin Li // Stop if we have reached a good enough length.
409*b2055c35SXin Li if (best_length >= length_max) break;
410*b2055c35SXin Li }
411*b2055c35SXin Li }
412*b2055c35SXin Li // We have the best match but in case the two intervals continue matching
413*b2055c35SXin Li // to the left, we have the best matches for the left-extended pixels.
414*b2055c35SXin Li max_base_position = base_position;
415*b2055c35SXin Li while (1) {
416*b2055c35SXin Li assert(best_length <= MAX_LENGTH);
417*b2055c35SXin Li assert(best_distance <= WINDOW_SIZE);
418*b2055c35SXin Li p->offset_length_[base_position] =
419*b2055c35SXin Li (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
420*b2055c35SXin Li --base_position;
421*b2055c35SXin Li // Stop if we don't have a match or if we are out of bounds.
422*b2055c35SXin Li if (best_distance == 0 || base_position == 0) break;
423*b2055c35SXin Li // Stop if we cannot extend the matching intervals to the left.
424*b2055c35SXin Li if (base_position < best_distance ||
425*b2055c35SXin Li argb[base_position - best_distance] != argb[base_position]) {
426*b2055c35SXin Li break;
427*b2055c35SXin Li }
428*b2055c35SXin Li // Stop if we are matching at its limit because there could be a closer
429*b2055c35SXin Li // matching interval with the same maximum length. Then again, if the
430*b2055c35SXin Li // matching interval is as close as possible (best_distance == 1), we will
431*b2055c35SXin Li // never find anything better so let's continue.
432*b2055c35SXin Li if (best_length == MAX_LENGTH && best_distance != 1 &&
433*b2055c35SXin Li base_position + MAX_LENGTH < max_base_position) {
434*b2055c35SXin Li break;
435*b2055c35SXin Li }
436*b2055c35SXin Li if (best_length < MAX_LENGTH) {
437*b2055c35SXin Li ++best_length;
438*b2055c35SXin Li max_base_position = base_position;
439*b2055c35SXin Li }
440*b2055c35SXin Li }
441*b2055c35SXin Li
442*b2055c35SXin Li if (!WebPReportProgress(pic,
443*b2055c35SXin Li percent_start + percent_range *
444*b2055c35SXin Li (size - 2 - base_position) /
445*b2055c35SXin Li (size - 2),
446*b2055c35SXin Li percent)) {
447*b2055c35SXin Li return 0;
448*b2055c35SXin Li }
449*b2055c35SXin Li }
450*b2055c35SXin Li
451*b2055c35SXin Li return WebPReportProgress(pic, percent_start + percent_range, percent);
452*b2055c35SXin Li }
453*b2055c35SXin Li
AddSingleLiteral(uint32_t pixel,int use_color_cache,VP8LColorCache * const hashers,VP8LBackwardRefs * const refs)454*b2055c35SXin Li static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
455*b2055c35SXin Li VP8LColorCache* const hashers,
456*b2055c35SXin Li VP8LBackwardRefs* const refs) {
457*b2055c35SXin Li PixOrCopy v;
458*b2055c35SXin Li if (use_color_cache) {
459*b2055c35SXin Li const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
460*b2055c35SXin Li if (VP8LColorCacheLookup(hashers, key) == pixel) {
461*b2055c35SXin Li v = PixOrCopyCreateCacheIdx(key);
462*b2055c35SXin Li } else {
463*b2055c35SXin Li v = PixOrCopyCreateLiteral(pixel);
464*b2055c35SXin Li VP8LColorCacheSet(hashers, key, pixel);
465*b2055c35SXin Li }
466*b2055c35SXin Li } else {
467*b2055c35SXin Li v = PixOrCopyCreateLiteral(pixel);
468*b2055c35SXin Li }
469*b2055c35SXin Li VP8LBackwardRefsCursorAdd(refs, v);
470*b2055c35SXin Li }
471*b2055c35SXin Li
BackwardReferencesRle(int xsize,int ysize,const uint32_t * const argb,int cache_bits,VP8LBackwardRefs * const refs)472*b2055c35SXin Li static int BackwardReferencesRle(int xsize, int ysize,
473*b2055c35SXin Li const uint32_t* const argb,
474*b2055c35SXin Li int cache_bits, VP8LBackwardRefs* const refs) {
475*b2055c35SXin Li const int pix_count = xsize * ysize;
476*b2055c35SXin Li int i, k;
477*b2055c35SXin Li const int use_color_cache = (cache_bits > 0);
478*b2055c35SXin Li VP8LColorCache hashers;
479*b2055c35SXin Li
480*b2055c35SXin Li if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
481*b2055c35SXin Li return 0;
482*b2055c35SXin Li }
483*b2055c35SXin Li VP8LClearBackwardRefs(refs);
484*b2055c35SXin Li // Add first pixel as literal.
485*b2055c35SXin Li AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
486*b2055c35SXin Li i = 1;
487*b2055c35SXin Li while (i < pix_count) {
488*b2055c35SXin Li const int max_len = MaxFindCopyLength(pix_count - i);
489*b2055c35SXin Li const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
490*b2055c35SXin Li const int prev_row_len = (i < xsize) ? 0 :
491*b2055c35SXin Li FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
492*b2055c35SXin Li if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) {
493*b2055c35SXin Li VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
494*b2055c35SXin Li // We don't need to update the color cache here since it is always the
495*b2055c35SXin Li // same pixel being copied, and that does not change the color cache
496*b2055c35SXin Li // state.
497*b2055c35SXin Li i += rle_len;
498*b2055c35SXin Li } else if (prev_row_len >= MIN_LENGTH) {
499*b2055c35SXin Li VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
500*b2055c35SXin Li if (use_color_cache) {
501*b2055c35SXin Li for (k = 0; k < prev_row_len; ++k) {
502*b2055c35SXin Li VP8LColorCacheInsert(&hashers, argb[i + k]);
503*b2055c35SXin Li }
504*b2055c35SXin Li }
505*b2055c35SXin Li i += prev_row_len;
506*b2055c35SXin Li } else {
507*b2055c35SXin Li AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
508*b2055c35SXin Li i++;
509*b2055c35SXin Li }
510*b2055c35SXin Li }
511*b2055c35SXin Li if (use_color_cache) VP8LColorCacheClear(&hashers);
512*b2055c35SXin Li return !refs->error_;
513*b2055c35SXin Li }
514*b2055c35SXin Li
BackwardReferencesLz77(int xsize,int ysize,const uint32_t * const argb,int cache_bits,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs)515*b2055c35SXin Li static int BackwardReferencesLz77(int xsize, int ysize,
516*b2055c35SXin Li const uint32_t* const argb, int cache_bits,
517*b2055c35SXin Li const VP8LHashChain* const hash_chain,
518*b2055c35SXin Li VP8LBackwardRefs* const refs) {
519*b2055c35SXin Li int i;
520*b2055c35SXin Li int i_last_check = -1;
521*b2055c35SXin Li int ok = 0;
522*b2055c35SXin Li int cc_init = 0;
523*b2055c35SXin Li const int use_color_cache = (cache_bits > 0);
524*b2055c35SXin Li const int pix_count = xsize * ysize;
525*b2055c35SXin Li VP8LColorCache hashers;
526*b2055c35SXin Li
527*b2055c35SXin Li if (use_color_cache) {
528*b2055c35SXin Li cc_init = VP8LColorCacheInit(&hashers, cache_bits);
529*b2055c35SXin Li if (!cc_init) goto Error;
530*b2055c35SXin Li }
531*b2055c35SXin Li VP8LClearBackwardRefs(refs);
532*b2055c35SXin Li for (i = 0; i < pix_count;) {
533*b2055c35SXin Li // Alternative#1: Code the pixels starting at 'i' using backward reference.
534*b2055c35SXin Li int offset = 0;
535*b2055c35SXin Li int len = 0;
536*b2055c35SXin Li int j;
537*b2055c35SXin Li VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
538*b2055c35SXin Li if (len >= MIN_LENGTH) {
539*b2055c35SXin Li const int len_ini = len;
540*b2055c35SXin Li int max_reach = 0;
541*b2055c35SXin Li const int j_max =
542*b2055c35SXin Li (i + len_ini >= pix_count) ? pix_count - 1 : i + len_ini;
543*b2055c35SXin Li // Only start from what we have not checked already.
544*b2055c35SXin Li i_last_check = (i > i_last_check) ? i : i_last_check;
545*b2055c35SXin Li // We know the best match for the current pixel but we try to find the
546*b2055c35SXin Li // best matches for the current pixel AND the next one combined.
547*b2055c35SXin Li // The naive method would use the intervals:
548*b2055c35SXin Li // [i,i+len) + [i+len, length of best match at i+len)
549*b2055c35SXin Li // while we check if we can use:
550*b2055c35SXin Li // [i,j) (where j<=i+len) + [j, length of best match at j)
551*b2055c35SXin Li for (j = i_last_check + 1; j <= j_max; ++j) {
552*b2055c35SXin Li const int len_j = VP8LHashChainFindLength(hash_chain, j);
553*b2055c35SXin Li const int reach =
554*b2055c35SXin Li j + (len_j >= MIN_LENGTH ? len_j : 1); // 1 for single literal.
555*b2055c35SXin Li if (reach > max_reach) {
556*b2055c35SXin Li len = j - i;
557*b2055c35SXin Li max_reach = reach;
558*b2055c35SXin Li if (max_reach >= pix_count) break;
559*b2055c35SXin Li }
560*b2055c35SXin Li }
561*b2055c35SXin Li } else {
562*b2055c35SXin Li len = 1;
563*b2055c35SXin Li }
564*b2055c35SXin Li // Go with literal or backward reference.
565*b2055c35SXin Li assert(len > 0);
566*b2055c35SXin Li if (len == 1) {
567*b2055c35SXin Li AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
568*b2055c35SXin Li } else {
569*b2055c35SXin Li VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
570*b2055c35SXin Li if (use_color_cache) {
571*b2055c35SXin Li for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]);
572*b2055c35SXin Li }
573*b2055c35SXin Li }
574*b2055c35SXin Li i += len;
575*b2055c35SXin Li }
576*b2055c35SXin Li
577*b2055c35SXin Li ok = !refs->error_;
578*b2055c35SXin Li Error:
579*b2055c35SXin Li if (cc_init) VP8LColorCacheClear(&hashers);
580*b2055c35SXin Li return ok;
581*b2055c35SXin Li }
582*b2055c35SXin Li
583*b2055c35SXin Li // Compute an LZ77 by forcing matches to happen within a given distance cost.
584*b2055c35SXin Li // We therefore limit the algorithm to the lowest 32 values in the PlaneCode
585*b2055c35SXin Li // definition.
586*b2055c35SXin Li #define WINDOW_OFFSETS_SIZE_MAX 32
BackwardReferencesLz77Box(int xsize,int ysize,const uint32_t * const argb,int cache_bits,const VP8LHashChain * const hash_chain_best,VP8LHashChain * hash_chain,VP8LBackwardRefs * const refs)587*b2055c35SXin Li static int BackwardReferencesLz77Box(int xsize, int ysize,
588*b2055c35SXin Li const uint32_t* const argb, int cache_bits,
589*b2055c35SXin Li const VP8LHashChain* const hash_chain_best,
590*b2055c35SXin Li VP8LHashChain* hash_chain,
591*b2055c35SXin Li VP8LBackwardRefs* const refs) {
592*b2055c35SXin Li int i;
593*b2055c35SXin Li const int pix_count = xsize * ysize;
594*b2055c35SXin Li uint16_t* counts;
595*b2055c35SXin Li int window_offsets[WINDOW_OFFSETS_SIZE_MAX] = {0};
596*b2055c35SXin Li int window_offsets_new[WINDOW_OFFSETS_SIZE_MAX] = {0};
597*b2055c35SXin Li int window_offsets_size = 0;
598*b2055c35SXin Li int window_offsets_new_size = 0;
599*b2055c35SXin Li uint16_t* const counts_ini =
600*b2055c35SXin Li (uint16_t*)WebPSafeMalloc(xsize * ysize, sizeof(*counts_ini));
601*b2055c35SXin Li int best_offset_prev = -1, best_length_prev = -1;
602*b2055c35SXin Li if (counts_ini == NULL) return 0;
603*b2055c35SXin Li
604*b2055c35SXin Li // counts[i] counts how many times a pixel is repeated starting at position i.
605*b2055c35SXin Li i = pix_count - 2;
606*b2055c35SXin Li counts = counts_ini + i;
607*b2055c35SXin Li counts[1] = 1;
608*b2055c35SXin Li for (; i >= 0; --i, --counts) {
609*b2055c35SXin Li if (argb[i] == argb[i + 1]) {
610*b2055c35SXin Li // Max out the counts to MAX_LENGTH.
611*b2055c35SXin Li counts[0] = counts[1] + (counts[1] != MAX_LENGTH);
612*b2055c35SXin Li } else {
613*b2055c35SXin Li counts[0] = 1;
614*b2055c35SXin Li }
615*b2055c35SXin Li }
616*b2055c35SXin Li
617*b2055c35SXin Li // Figure out the window offsets around a pixel. They are stored in a
618*b2055c35SXin Li // spiraling order around the pixel as defined by VP8LDistanceToPlaneCode.
619*b2055c35SXin Li {
620*b2055c35SXin Li int x, y;
621*b2055c35SXin Li for (y = 0; y <= 6; ++y) {
622*b2055c35SXin Li for (x = -6; x <= 6; ++x) {
623*b2055c35SXin Li const int offset = y * xsize + x;
624*b2055c35SXin Li int plane_code;
625*b2055c35SXin Li // Ignore offsets that bring us after the pixel.
626*b2055c35SXin Li if (offset <= 0) continue;
627*b2055c35SXin Li plane_code = VP8LDistanceToPlaneCode(xsize, offset) - 1;
628*b2055c35SXin Li if (plane_code >= WINDOW_OFFSETS_SIZE_MAX) continue;
629*b2055c35SXin Li window_offsets[plane_code] = offset;
630*b2055c35SXin Li }
631*b2055c35SXin Li }
632*b2055c35SXin Li // For narrow images, not all plane codes are reached, so remove those.
633*b2055c35SXin Li for (i = 0; i < WINDOW_OFFSETS_SIZE_MAX; ++i) {
634*b2055c35SXin Li if (window_offsets[i] == 0) continue;
635*b2055c35SXin Li window_offsets[window_offsets_size++] = window_offsets[i];
636*b2055c35SXin Li }
637*b2055c35SXin Li // Given a pixel P, find the offsets that reach pixels unreachable from P-1
638*b2055c35SXin Li // with any of the offsets in window_offsets[].
639*b2055c35SXin Li for (i = 0; i < window_offsets_size; ++i) {
640*b2055c35SXin Li int j;
641*b2055c35SXin Li int is_reachable = 0;
642*b2055c35SXin Li for (j = 0; j < window_offsets_size && !is_reachable; ++j) {
643*b2055c35SXin Li is_reachable |= (window_offsets[i] == window_offsets[j] + 1);
644*b2055c35SXin Li }
645*b2055c35SXin Li if (!is_reachable) {
646*b2055c35SXin Li window_offsets_new[window_offsets_new_size] = window_offsets[i];
647*b2055c35SXin Li ++window_offsets_new_size;
648*b2055c35SXin Li }
649*b2055c35SXin Li }
650*b2055c35SXin Li }
651*b2055c35SXin Li
652*b2055c35SXin Li hash_chain->offset_length_[0] = 0;
653*b2055c35SXin Li for (i = 1; i < pix_count; ++i) {
654*b2055c35SXin Li int ind;
655*b2055c35SXin Li int best_length = VP8LHashChainFindLength(hash_chain_best, i);
656*b2055c35SXin Li int best_offset;
657*b2055c35SXin Li int do_compute = 1;
658*b2055c35SXin Li
659*b2055c35SXin Li if (best_length >= MAX_LENGTH) {
660*b2055c35SXin Li // Do not recompute the best match if we already have a maximal one in the
661*b2055c35SXin Li // window.
662*b2055c35SXin Li best_offset = VP8LHashChainFindOffset(hash_chain_best, i);
663*b2055c35SXin Li for (ind = 0; ind < window_offsets_size; ++ind) {
664*b2055c35SXin Li if (best_offset == window_offsets[ind]) {
665*b2055c35SXin Li do_compute = 0;
666*b2055c35SXin Li break;
667*b2055c35SXin Li }
668*b2055c35SXin Li }
669*b2055c35SXin Li }
670*b2055c35SXin Li if (do_compute) {
671*b2055c35SXin Li // Figure out if we should use the offset/length from the previous pixel
672*b2055c35SXin Li // as an initial guess and therefore only inspect the offsets in
673*b2055c35SXin Li // window_offsets_new[].
674*b2055c35SXin Li const int use_prev =
675*b2055c35SXin Li (best_length_prev > 1) && (best_length_prev < MAX_LENGTH);
676*b2055c35SXin Li const int num_ind =
677*b2055c35SXin Li use_prev ? window_offsets_new_size : window_offsets_size;
678*b2055c35SXin Li best_length = use_prev ? best_length_prev - 1 : 0;
679*b2055c35SXin Li best_offset = use_prev ? best_offset_prev : 0;
680*b2055c35SXin Li // Find the longest match in a window around the pixel.
681*b2055c35SXin Li for (ind = 0; ind < num_ind; ++ind) {
682*b2055c35SXin Li int curr_length = 0;
683*b2055c35SXin Li int j = i;
684*b2055c35SXin Li int j_offset =
685*b2055c35SXin Li use_prev ? i - window_offsets_new[ind] : i - window_offsets[ind];
686*b2055c35SXin Li if (j_offset < 0 || argb[j_offset] != argb[i]) continue;
687*b2055c35SXin Li // The longest match is the sum of how many times each pixel is
688*b2055c35SXin Li // repeated.
689*b2055c35SXin Li do {
690*b2055c35SXin Li const int counts_j_offset = counts_ini[j_offset];
691*b2055c35SXin Li const int counts_j = counts_ini[j];
692*b2055c35SXin Li if (counts_j_offset != counts_j) {
693*b2055c35SXin Li curr_length +=
694*b2055c35SXin Li (counts_j_offset < counts_j) ? counts_j_offset : counts_j;
695*b2055c35SXin Li break;
696*b2055c35SXin Li }
697*b2055c35SXin Li // The same color is repeated counts_pos times at j_offset and j.
698*b2055c35SXin Li curr_length += counts_j_offset;
699*b2055c35SXin Li j_offset += counts_j_offset;
700*b2055c35SXin Li j += counts_j_offset;
701*b2055c35SXin Li } while (curr_length <= MAX_LENGTH && j < pix_count &&
702*b2055c35SXin Li argb[j_offset] == argb[j]);
703*b2055c35SXin Li if (best_length < curr_length) {
704*b2055c35SXin Li best_offset =
705*b2055c35SXin Li use_prev ? window_offsets_new[ind] : window_offsets[ind];
706*b2055c35SXin Li if (curr_length >= MAX_LENGTH) {
707*b2055c35SXin Li best_length = MAX_LENGTH;
708*b2055c35SXin Li break;
709*b2055c35SXin Li } else {
710*b2055c35SXin Li best_length = curr_length;
711*b2055c35SXin Li }
712*b2055c35SXin Li }
713*b2055c35SXin Li }
714*b2055c35SXin Li }
715*b2055c35SXin Li
716*b2055c35SXin Li assert(i + best_length <= pix_count);
717*b2055c35SXin Li assert(best_length <= MAX_LENGTH);
718*b2055c35SXin Li if (best_length <= MIN_LENGTH) {
719*b2055c35SXin Li hash_chain->offset_length_[i] = 0;
720*b2055c35SXin Li best_offset_prev = 0;
721*b2055c35SXin Li best_length_prev = 0;
722*b2055c35SXin Li } else {
723*b2055c35SXin Li hash_chain->offset_length_[i] =
724*b2055c35SXin Li (best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length;
725*b2055c35SXin Li best_offset_prev = best_offset;
726*b2055c35SXin Li best_length_prev = best_length;
727*b2055c35SXin Li }
728*b2055c35SXin Li }
729*b2055c35SXin Li hash_chain->offset_length_[0] = 0;
730*b2055c35SXin Li WebPSafeFree(counts_ini);
731*b2055c35SXin Li
732*b2055c35SXin Li return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain,
733*b2055c35SXin Li refs);
734*b2055c35SXin Li }
735*b2055c35SXin Li
736*b2055c35SXin Li // -----------------------------------------------------------------------------
737*b2055c35SXin Li
BackwardReferences2DLocality(int xsize,const VP8LBackwardRefs * const refs)738*b2055c35SXin Li static void BackwardReferences2DLocality(int xsize,
739*b2055c35SXin Li const VP8LBackwardRefs* const refs) {
740*b2055c35SXin Li VP8LRefsCursor c = VP8LRefsCursorInit(refs);
741*b2055c35SXin Li while (VP8LRefsCursorOk(&c)) {
742*b2055c35SXin Li if (PixOrCopyIsCopy(c.cur_pos)) {
743*b2055c35SXin Li const int dist = c.cur_pos->argb_or_distance;
744*b2055c35SXin Li const int transformed_dist = VP8LDistanceToPlaneCode(xsize, dist);
745*b2055c35SXin Li c.cur_pos->argb_or_distance = transformed_dist;
746*b2055c35SXin Li }
747*b2055c35SXin Li VP8LRefsCursorNext(&c);
748*b2055c35SXin Li }
749*b2055c35SXin Li }
750*b2055c35SXin Li
751*b2055c35SXin Li // Evaluate optimal cache bits for the local color cache.
752*b2055c35SXin Li // The input *best_cache_bits sets the maximum cache bits to use (passing 0
753*b2055c35SXin Li // implies disabling the local color cache). The local color cache is also
754*b2055c35SXin Li // disabled for the lower (<= 25) quality.
755*b2055c35SXin Li // Returns 0 in case of memory error.
CalculateBestCacheSize(const uint32_t * argb,int quality,const VP8LBackwardRefs * const refs,int * const best_cache_bits)756*b2055c35SXin Li static int CalculateBestCacheSize(const uint32_t* argb, int quality,
757*b2055c35SXin Li const VP8LBackwardRefs* const refs,
758*b2055c35SXin Li int* const best_cache_bits) {
759*b2055c35SXin Li int i;
760*b2055c35SXin Li const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
761*b2055c35SXin Li float entropy_min = MAX_ENTROPY;
762*b2055c35SXin Li int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
763*b2055c35SXin Li VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
764*b2055c35SXin Li VP8LRefsCursor c = VP8LRefsCursorInit(refs);
765*b2055c35SXin Li VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL };
766*b2055c35SXin Li int ok = 0;
767*b2055c35SXin Li
768*b2055c35SXin Li assert(cache_bits_max >= 0 && cache_bits_max <= MAX_COLOR_CACHE_BITS);
769*b2055c35SXin Li
770*b2055c35SXin Li if (cache_bits_max == 0) {
771*b2055c35SXin Li *best_cache_bits = 0;
772*b2055c35SXin Li // Local color cache is disabled.
773*b2055c35SXin Li return 1;
774*b2055c35SXin Li }
775*b2055c35SXin Li
776*b2055c35SXin Li // Allocate data.
777*b2055c35SXin Li for (i = 0; i <= cache_bits_max; ++i) {
778*b2055c35SXin Li histos[i] = VP8LAllocateHistogram(i);
779*b2055c35SXin Li if (histos[i] == NULL) goto Error;
780*b2055c35SXin Li VP8LHistogramInit(histos[i], i, /*init_arrays=*/ 1);
781*b2055c35SXin Li if (i == 0) continue;
782*b2055c35SXin Li cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
783*b2055c35SXin Li if (!cc_init[i]) goto Error;
784*b2055c35SXin Li }
785*b2055c35SXin Li
786*b2055c35SXin Li // Find the cache_bits giving the lowest entropy. The search is done in a
787*b2055c35SXin Li // brute-force way as the function (entropy w.r.t cache_bits) can be
788*b2055c35SXin Li // anything in practice.
789*b2055c35SXin Li while (VP8LRefsCursorOk(&c)) {
790*b2055c35SXin Li const PixOrCopy* const v = c.cur_pos;
791*b2055c35SXin Li if (PixOrCopyIsLiteral(v)) {
792*b2055c35SXin Li const uint32_t pix = *argb++;
793*b2055c35SXin Li const uint32_t a = (pix >> 24) & 0xff;
794*b2055c35SXin Li const uint32_t r = (pix >> 16) & 0xff;
795*b2055c35SXin Li const uint32_t g = (pix >> 8) & 0xff;
796*b2055c35SXin Li const uint32_t b = (pix >> 0) & 0xff;
797*b2055c35SXin Li // The keys of the caches can be derived from the longest one.
798*b2055c35SXin Li int key = VP8LHashPix(pix, 32 - cache_bits_max);
799*b2055c35SXin Li // Do not use the color cache for cache_bits = 0.
800*b2055c35SXin Li ++histos[0]->blue_[b];
801*b2055c35SXin Li ++histos[0]->literal_[g];
802*b2055c35SXin Li ++histos[0]->red_[r];
803*b2055c35SXin Li ++histos[0]->alpha_[a];
804*b2055c35SXin Li // Deal with cache_bits > 0.
805*b2055c35SXin Li for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
806*b2055c35SXin Li if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
807*b2055c35SXin Li ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
808*b2055c35SXin Li } else {
809*b2055c35SXin Li VP8LColorCacheSet(&hashers[i], key, pix);
810*b2055c35SXin Li ++histos[i]->blue_[b];
811*b2055c35SXin Li ++histos[i]->literal_[g];
812*b2055c35SXin Li ++histos[i]->red_[r];
813*b2055c35SXin Li ++histos[i]->alpha_[a];
814*b2055c35SXin Li }
815*b2055c35SXin Li }
816*b2055c35SXin Li } else {
817*b2055c35SXin Li int code, extra_bits, extra_bits_value;
818*b2055c35SXin Li // We should compute the contribution of the (distance,length)
819*b2055c35SXin Li // histograms but those are the same independently from the cache size.
820*b2055c35SXin Li // As those constant contributions are in the end added to the other
821*b2055c35SXin Li // histogram contributions, we can ignore them, except for the length
822*b2055c35SXin Li // prefix that is part of the literal_ histogram.
823*b2055c35SXin Li int len = PixOrCopyLength(v);
824*b2055c35SXin Li uint32_t argb_prev = *argb ^ 0xffffffffu;
825*b2055c35SXin Li VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value);
826*b2055c35SXin Li for (i = 0; i <= cache_bits_max; ++i) {
827*b2055c35SXin Li ++histos[i]->literal_[NUM_LITERAL_CODES + code];
828*b2055c35SXin Li }
829*b2055c35SXin Li // Update the color caches.
830*b2055c35SXin Li do {
831*b2055c35SXin Li if (*argb != argb_prev) {
832*b2055c35SXin Li // Efficiency: insert only if the color changes.
833*b2055c35SXin Li int key = VP8LHashPix(*argb, 32 - cache_bits_max);
834*b2055c35SXin Li for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
835*b2055c35SXin Li hashers[i].colors_[key] = *argb;
836*b2055c35SXin Li }
837*b2055c35SXin Li argb_prev = *argb;
838*b2055c35SXin Li }
839*b2055c35SXin Li argb++;
840*b2055c35SXin Li } while (--len != 0);
841*b2055c35SXin Li }
842*b2055c35SXin Li VP8LRefsCursorNext(&c);
843*b2055c35SXin Li }
844*b2055c35SXin Li
845*b2055c35SXin Li for (i = 0; i <= cache_bits_max; ++i) {
846*b2055c35SXin Li const float entropy = VP8LHistogramEstimateBits(histos[i]);
847*b2055c35SXin Li if (i == 0 || entropy < entropy_min) {
848*b2055c35SXin Li entropy_min = entropy;
849*b2055c35SXin Li *best_cache_bits = i;
850*b2055c35SXin Li }
851*b2055c35SXin Li }
852*b2055c35SXin Li ok = 1;
853*b2055c35SXin Li Error:
854*b2055c35SXin Li for (i = 0; i <= cache_bits_max; ++i) {
855*b2055c35SXin Li if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
856*b2055c35SXin Li VP8LFreeHistogram(histos[i]);
857*b2055c35SXin Li }
858*b2055c35SXin Li return ok;
859*b2055c35SXin Li }
860*b2055c35SXin Li
861*b2055c35SXin Li // Update (in-place) backward references for specified cache_bits.
BackwardRefsWithLocalCache(const uint32_t * const argb,int cache_bits,VP8LBackwardRefs * const refs)862*b2055c35SXin Li static int BackwardRefsWithLocalCache(const uint32_t* const argb,
863*b2055c35SXin Li int cache_bits,
864*b2055c35SXin Li VP8LBackwardRefs* const refs) {
865*b2055c35SXin Li int pixel_index = 0;
866*b2055c35SXin Li VP8LColorCache hashers;
867*b2055c35SXin Li VP8LRefsCursor c = VP8LRefsCursorInit(refs);
868*b2055c35SXin Li if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
869*b2055c35SXin Li
870*b2055c35SXin Li while (VP8LRefsCursorOk(&c)) {
871*b2055c35SXin Li PixOrCopy* const v = c.cur_pos;
872*b2055c35SXin Li if (PixOrCopyIsLiteral(v)) {
873*b2055c35SXin Li const uint32_t argb_literal = v->argb_or_distance;
874*b2055c35SXin Li const int ix = VP8LColorCacheContains(&hashers, argb_literal);
875*b2055c35SXin Li if (ix >= 0) {
876*b2055c35SXin Li // hashers contains argb_literal
877*b2055c35SXin Li *v = PixOrCopyCreateCacheIdx(ix);
878*b2055c35SXin Li } else {
879*b2055c35SXin Li VP8LColorCacheInsert(&hashers, argb_literal);
880*b2055c35SXin Li }
881*b2055c35SXin Li ++pixel_index;
882*b2055c35SXin Li } else {
883*b2055c35SXin Li // refs was created without local cache, so it can not have cache indexes.
884*b2055c35SXin Li int k;
885*b2055c35SXin Li assert(PixOrCopyIsCopy(v));
886*b2055c35SXin Li for (k = 0; k < v->len; ++k) {
887*b2055c35SXin Li VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
888*b2055c35SXin Li }
889*b2055c35SXin Li }
890*b2055c35SXin Li VP8LRefsCursorNext(&c);
891*b2055c35SXin Li }
892*b2055c35SXin Li VP8LColorCacheClear(&hashers);
893*b2055c35SXin Li return 1;
894*b2055c35SXin Li }
895*b2055c35SXin Li
GetBackwardReferencesLowEffort(int width,int height,const uint32_t * const argb,int * const cache_bits,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs_lz77)896*b2055c35SXin Li static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
897*b2055c35SXin Li int width, int height, const uint32_t* const argb,
898*b2055c35SXin Li int* const cache_bits, const VP8LHashChain* const hash_chain,
899*b2055c35SXin Li VP8LBackwardRefs* const refs_lz77) {
900*b2055c35SXin Li *cache_bits = 0;
901*b2055c35SXin Li if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) {
902*b2055c35SXin Li return NULL;
903*b2055c35SXin Li }
904*b2055c35SXin Li BackwardReferences2DLocality(width, refs_lz77);
905*b2055c35SXin Li return refs_lz77;
906*b2055c35SXin Li }
907*b2055c35SXin Li
908*b2055c35SXin Li extern int VP8LBackwardReferencesTraceBackwards(
909*b2055c35SXin Li int xsize, int ysize, const uint32_t* const argb, int cache_bits,
910*b2055c35SXin Li const VP8LHashChain* const hash_chain,
911*b2055c35SXin Li const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst);
GetBackwardReferences(int width,int height,const uint32_t * const argb,int quality,int lz77_types_to_try,int cache_bits_max,int do_no_cache,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs,int * const cache_bits_best)912*b2055c35SXin Li static int GetBackwardReferences(int width, int height,
913*b2055c35SXin Li const uint32_t* const argb, int quality,
914*b2055c35SXin Li int lz77_types_to_try, int cache_bits_max,
915*b2055c35SXin Li int do_no_cache,
916*b2055c35SXin Li const VP8LHashChain* const hash_chain,
917*b2055c35SXin Li VP8LBackwardRefs* const refs,
918*b2055c35SXin Li int* const cache_bits_best) {
919*b2055c35SXin Li VP8LHistogram* histo = NULL;
920*b2055c35SXin Li int i, lz77_type;
921*b2055c35SXin Li // Index 0 is for a color cache, index 1 for no cache (if needed).
922*b2055c35SXin Li int lz77_types_best[2] = {0, 0};
923*b2055c35SXin Li float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
924*b2055c35SXin Li VP8LHashChain hash_chain_box;
925*b2055c35SXin Li VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
926*b2055c35SXin Li int status = 0;
927*b2055c35SXin Li memset(&hash_chain_box, 0, sizeof(hash_chain_box));
928*b2055c35SXin Li
929*b2055c35SXin Li histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS);
930*b2055c35SXin Li if (histo == NULL) goto Error;
931*b2055c35SXin Li
932*b2055c35SXin Li for (lz77_type = 1; lz77_types_to_try;
933*b2055c35SXin Li lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
934*b2055c35SXin Li int res = 0;
935*b2055c35SXin Li float bit_cost = 0.f;
936*b2055c35SXin Li if ((lz77_types_to_try & lz77_type) == 0) continue;
937*b2055c35SXin Li switch (lz77_type) {
938*b2055c35SXin Li case kLZ77RLE:
939*b2055c35SXin Li res = BackwardReferencesRle(width, height, argb, 0, refs_tmp);
940*b2055c35SXin Li break;
941*b2055c35SXin Li case kLZ77Standard:
942*b2055c35SXin Li // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color
943*b2055c35SXin Li // cache is not that different in practice.
944*b2055c35SXin Li res = BackwardReferencesLz77(width, height, argb, 0, hash_chain,
945*b2055c35SXin Li refs_tmp);
946*b2055c35SXin Li break;
947*b2055c35SXin Li case kLZ77Box:
948*b2055c35SXin Li if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error;
949*b2055c35SXin Li res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain,
950*b2055c35SXin Li &hash_chain_box, refs_tmp);
951*b2055c35SXin Li break;
952*b2055c35SXin Li default:
953*b2055c35SXin Li assert(0);
954*b2055c35SXin Li }
955*b2055c35SXin Li if (!res) goto Error;
956*b2055c35SXin Li
957*b2055c35SXin Li // Start with the no color cache case.
958*b2055c35SXin Li for (i = 1; i >= 0; --i) {
959*b2055c35SXin Li int cache_bits = (i == 1) ? 0 : cache_bits_max;
960*b2055c35SXin Li
961*b2055c35SXin Li if (i == 1 && !do_no_cache) continue;
962*b2055c35SXin Li
963*b2055c35SXin Li if (i == 0) {
964*b2055c35SXin Li // Try with a color cache.
965*b2055c35SXin Li if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) {
966*b2055c35SXin Li goto Error;
967*b2055c35SXin Li }
968*b2055c35SXin Li if (cache_bits > 0) {
969*b2055c35SXin Li if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) {
970*b2055c35SXin Li goto Error;
971*b2055c35SXin Li }
972*b2055c35SXin Li }
973*b2055c35SXin Li }
974*b2055c35SXin Li
975*b2055c35SXin Li if (i == 0 && do_no_cache && cache_bits == 0) {
976*b2055c35SXin Li // No need to re-compute bit_cost as it was computed at i == 1.
977*b2055c35SXin Li } else {
978*b2055c35SXin Li VP8LHistogramCreate(histo, refs_tmp, cache_bits);
979*b2055c35SXin Li bit_cost = VP8LHistogramEstimateBits(histo);
980*b2055c35SXin Li }
981*b2055c35SXin Li
982*b2055c35SXin Li if (bit_cost < bit_costs_best[i]) {
983*b2055c35SXin Li if (i == 1) {
984*b2055c35SXin Li // Do not swap as the full cache analysis would have the wrong
985*b2055c35SXin Li // VP8LBackwardRefs to start with.
986*b2055c35SXin Li if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error;
987*b2055c35SXin Li } else {
988*b2055c35SXin Li BackwardRefsSwap(refs_tmp, &refs[0]);
989*b2055c35SXin Li }
990*b2055c35SXin Li bit_costs_best[i] = bit_cost;
991*b2055c35SXin Li lz77_types_best[i] = lz77_type;
992*b2055c35SXin Li if (i == 0) *cache_bits_best = cache_bits;
993*b2055c35SXin Li }
994*b2055c35SXin Li }
995*b2055c35SXin Li }
996*b2055c35SXin Li assert(lz77_types_best[0] > 0);
997*b2055c35SXin Li assert(!do_no_cache || lz77_types_best[1] > 0);
998*b2055c35SXin Li
999*b2055c35SXin Li // Improve on simple LZ77 but only for high quality (TraceBackwards is
1000*b2055c35SXin Li // costly).
1001*b2055c35SXin Li for (i = 1; i >= 0; --i) {
1002*b2055c35SXin Li if (i == 1 && !do_no_cache) continue;
1003*b2055c35SXin Li if ((lz77_types_best[i] == kLZ77Standard ||
1004*b2055c35SXin Li lz77_types_best[i] == kLZ77Box) &&
1005*b2055c35SXin Li quality >= 25) {
1006*b2055c35SXin Li const VP8LHashChain* const hash_chain_tmp =
1007*b2055c35SXin Li (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
1008*b2055c35SXin Li const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
1009*b2055c35SXin Li float bit_cost_trace;
1010*b2055c35SXin Li if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
1011*b2055c35SXin Li hash_chain_tmp, &refs[i],
1012*b2055c35SXin Li refs_tmp)) {
1013*b2055c35SXin Li goto Error;
1014*b2055c35SXin Li }
1015*b2055c35SXin Li VP8LHistogramCreate(histo, refs_tmp, cache_bits);
1016*b2055c35SXin Li bit_cost_trace = VP8LHistogramEstimateBits(histo);
1017*b2055c35SXin Li if (bit_cost_trace < bit_costs_best[i]) {
1018*b2055c35SXin Li BackwardRefsSwap(refs_tmp, &refs[i]);
1019*b2055c35SXin Li }
1020*b2055c35SXin Li }
1021*b2055c35SXin Li
1022*b2055c35SXin Li BackwardReferences2DLocality(width, &refs[i]);
1023*b2055c35SXin Li
1024*b2055c35SXin Li if (i == 1 && lz77_types_best[0] == lz77_types_best[1] &&
1025*b2055c35SXin Li *cache_bits_best == 0) {
1026*b2055c35SXin Li // If the best cache size is 0 and we have the same best LZ77, just copy
1027*b2055c35SXin Li // the data over and stop here.
1028*b2055c35SXin Li if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error;
1029*b2055c35SXin Li break;
1030*b2055c35SXin Li }
1031*b2055c35SXin Li }
1032*b2055c35SXin Li status = 1;
1033*b2055c35SXin Li
1034*b2055c35SXin Li Error:
1035*b2055c35SXin Li VP8LHashChainClear(&hash_chain_box);
1036*b2055c35SXin Li VP8LFreeHistogram(histo);
1037*b2055c35SXin Li return status;
1038*b2055c35SXin Li }
1039*b2055c35SXin Li
VP8LGetBackwardReferences(int width,int height,const uint32_t * const argb,int quality,int low_effort,int lz77_types_to_try,int cache_bits_max,int do_no_cache,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs,int * const cache_bits_best,const WebPPicture * const pic,int percent_range,int * const percent)1040*b2055c35SXin Li int VP8LGetBackwardReferences(
1041*b2055c35SXin Li int width, int height, const uint32_t* const argb, int quality,
1042*b2055c35SXin Li int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
1043*b2055c35SXin Li const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
1044*b2055c35SXin Li int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
1045*b2055c35SXin Li int* const percent) {
1046*b2055c35SXin Li if (low_effort) {
1047*b2055c35SXin Li VP8LBackwardRefs* refs_best;
1048*b2055c35SXin Li *cache_bits_best = cache_bits_max;
1049*b2055c35SXin Li refs_best = GetBackwardReferencesLowEffort(
1050*b2055c35SXin Li width, height, argb, cache_bits_best, hash_chain, refs);
1051*b2055c35SXin Li if (refs_best == NULL) {
1052*b2055c35SXin Li return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
1053*b2055c35SXin Li }
1054*b2055c35SXin Li // Set it in first position.
1055*b2055c35SXin Li BackwardRefsSwap(refs_best, &refs[0]);
1056*b2055c35SXin Li } else {
1057*b2055c35SXin Li if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
1058*b2055c35SXin Li cache_bits_max, do_no_cache, hash_chain, refs,
1059*b2055c35SXin Li cache_bits_best)) {
1060*b2055c35SXin Li return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
1061*b2055c35SXin Li }
1062*b2055c35SXin Li }
1063*b2055c35SXin Li
1064*b2055c35SXin Li return WebPReportProgress(pic, *percent + percent_range, percent);
1065*b2055c35SXin Li }
1066