1*33b1fccfSAndroid Build Coastguard Worker // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2*33b1fccfSAndroid Build Coastguard Worker /*
3*33b1fccfSAndroid Build Coastguard Worker * Copyright (C) 2022 Alibaba Cloud
4*33b1fccfSAndroid Build Coastguard Worker */
5*33b1fccfSAndroid Build Coastguard Worker #include <stdlib.h>
6*33b1fccfSAndroid Build Coastguard Worker #include "erofs/dedupe.h"
7*33b1fccfSAndroid Build Coastguard Worker #include "erofs/print.h"
8*33b1fccfSAndroid Build Coastguard Worker #include "rolling_hash.h"
9*33b1fccfSAndroid Build Coastguard Worker #include "xxhash.h"
10*33b1fccfSAndroid Build Coastguard Worker #include "sha256.h"
11*33b1fccfSAndroid Build Coastguard Worker
erofs_memcmp2(const u8 * s1,const u8 * s2,unsigned long sz)12*33b1fccfSAndroid Build Coastguard Worker unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
13*33b1fccfSAndroid Build Coastguard Worker unsigned long sz)
14*33b1fccfSAndroid Build Coastguard Worker {
15*33b1fccfSAndroid Build Coastguard Worker const unsigned long *a1, *a2;
16*33b1fccfSAndroid Build Coastguard Worker unsigned long n = sz;
17*33b1fccfSAndroid Build Coastguard Worker
18*33b1fccfSAndroid Build Coastguard Worker if (sz < sizeof(long))
19*33b1fccfSAndroid Build Coastguard Worker goto out_bytes;
20*33b1fccfSAndroid Build Coastguard Worker
21*33b1fccfSAndroid Build Coastguard Worker if (((long)s1 & (sizeof(long) - 1)) ==
22*33b1fccfSAndroid Build Coastguard Worker ((long)s2 & (sizeof(long) - 1))) {
23*33b1fccfSAndroid Build Coastguard Worker while ((long)s1 & (sizeof(long) - 1)) {
24*33b1fccfSAndroid Build Coastguard Worker if (*s1 != *s2)
25*33b1fccfSAndroid Build Coastguard Worker break;
26*33b1fccfSAndroid Build Coastguard Worker ++s1;
27*33b1fccfSAndroid Build Coastguard Worker ++s2;
28*33b1fccfSAndroid Build Coastguard Worker --sz;
29*33b1fccfSAndroid Build Coastguard Worker }
30*33b1fccfSAndroid Build Coastguard Worker
31*33b1fccfSAndroid Build Coastguard Worker a1 = (const unsigned long *)s1;
32*33b1fccfSAndroid Build Coastguard Worker a2 = (const unsigned long *)s2;
33*33b1fccfSAndroid Build Coastguard Worker while (sz >= sizeof(long)) {
34*33b1fccfSAndroid Build Coastguard Worker if (*a1 != *a2)
35*33b1fccfSAndroid Build Coastguard Worker break;
36*33b1fccfSAndroid Build Coastguard Worker ++a1;
37*33b1fccfSAndroid Build Coastguard Worker ++a2;
38*33b1fccfSAndroid Build Coastguard Worker sz -= sizeof(long);
39*33b1fccfSAndroid Build Coastguard Worker }
40*33b1fccfSAndroid Build Coastguard Worker } else {
41*33b1fccfSAndroid Build Coastguard Worker a1 = (const unsigned long *)s1;
42*33b1fccfSAndroid Build Coastguard Worker a2 = (const unsigned long *)s2;
43*33b1fccfSAndroid Build Coastguard Worker do {
44*33b1fccfSAndroid Build Coastguard Worker if (get_unaligned(a1) != get_unaligned(a2))
45*33b1fccfSAndroid Build Coastguard Worker break;
46*33b1fccfSAndroid Build Coastguard Worker ++a1;
47*33b1fccfSAndroid Build Coastguard Worker ++a2;
48*33b1fccfSAndroid Build Coastguard Worker sz -= sizeof(long);
49*33b1fccfSAndroid Build Coastguard Worker } while (sz >= sizeof(long));
50*33b1fccfSAndroid Build Coastguard Worker }
51*33b1fccfSAndroid Build Coastguard Worker s1 = (const u8 *)a1;
52*33b1fccfSAndroid Build Coastguard Worker s2 = (const u8 *)a2;
53*33b1fccfSAndroid Build Coastguard Worker out_bytes:
54*33b1fccfSAndroid Build Coastguard Worker while (sz) {
55*33b1fccfSAndroid Build Coastguard Worker if (*s1 != *s2)
56*33b1fccfSAndroid Build Coastguard Worker break;
57*33b1fccfSAndroid Build Coastguard Worker ++s1;
58*33b1fccfSAndroid Build Coastguard Worker ++s2;
59*33b1fccfSAndroid Build Coastguard Worker --sz;
60*33b1fccfSAndroid Build Coastguard Worker }
61*33b1fccfSAndroid Build Coastguard Worker return n - sz;
62*33b1fccfSAndroid Build Coastguard Worker }
63*33b1fccfSAndroid Build Coastguard Worker
64*33b1fccfSAndroid Build Coastguard Worker static unsigned int window_size, rollinghash_rm;
65*33b1fccfSAndroid Build Coastguard Worker static struct list_head dedupe_tree[65536];
66*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item *dedupe_subtree;
67*33b1fccfSAndroid Build Coastguard Worker
68*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item {
69*33b1fccfSAndroid Build Coastguard Worker struct list_head list;
70*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item *chain;
71*33b1fccfSAndroid Build Coastguard Worker long long hash;
72*33b1fccfSAndroid Build Coastguard Worker u8 prefix_sha256[32];
73*33b1fccfSAndroid Build Coastguard Worker u64 prefix_xxh64;
74*33b1fccfSAndroid Build Coastguard Worker
75*33b1fccfSAndroid Build Coastguard Worker erofs_blk_t compressed_blkaddr;
76*33b1fccfSAndroid Build Coastguard Worker unsigned int compressed_blks;
77*33b1fccfSAndroid Build Coastguard Worker
78*33b1fccfSAndroid Build Coastguard Worker int original_length;
79*33b1fccfSAndroid Build Coastguard Worker bool partial, raw;
80*33b1fccfSAndroid Build Coastguard Worker u8 extra_data[];
81*33b1fccfSAndroid Build Coastguard Worker };
82*33b1fccfSAndroid Build Coastguard Worker
z_erofs_dedupe_match(struct z_erofs_dedupe_ctx * ctx)83*33b1fccfSAndroid Build Coastguard Worker int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
84*33b1fccfSAndroid Build Coastguard Worker {
85*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item e_find;
86*33b1fccfSAndroid Build Coastguard Worker u8 *cur;
87*33b1fccfSAndroid Build Coastguard Worker bool initial = true;
88*33b1fccfSAndroid Build Coastguard Worker
89*33b1fccfSAndroid Build Coastguard Worker if (!window_size)
90*33b1fccfSAndroid Build Coastguard Worker return -ENOENT;
91*33b1fccfSAndroid Build Coastguard Worker
92*33b1fccfSAndroid Build Coastguard Worker if (ctx->cur > ctx->end - window_size)
93*33b1fccfSAndroid Build Coastguard Worker cur = ctx->end - window_size;
94*33b1fccfSAndroid Build Coastguard Worker else
95*33b1fccfSAndroid Build Coastguard Worker cur = ctx->cur;
96*33b1fccfSAndroid Build Coastguard Worker
97*33b1fccfSAndroid Build Coastguard Worker /* move backward byte-by-byte */
98*33b1fccfSAndroid Build Coastguard Worker for (; cur >= ctx->start; --cur) {
99*33b1fccfSAndroid Build Coastguard Worker struct list_head *p;
100*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item *e;
101*33b1fccfSAndroid Build Coastguard Worker
102*33b1fccfSAndroid Build Coastguard Worker unsigned int extra = 0;
103*33b1fccfSAndroid Build Coastguard Worker u64 xxh64_csum = 0;
104*33b1fccfSAndroid Build Coastguard Worker u8 sha256[32];
105*33b1fccfSAndroid Build Coastguard Worker
106*33b1fccfSAndroid Build Coastguard Worker if (initial) {
107*33b1fccfSAndroid Build Coastguard Worker /* initial try */
108*33b1fccfSAndroid Build Coastguard Worker e_find.hash = erofs_rolling_hash_init(cur, window_size, true);
109*33b1fccfSAndroid Build Coastguard Worker initial = false;
110*33b1fccfSAndroid Build Coastguard Worker } else {
111*33b1fccfSAndroid Build Coastguard Worker e_find.hash = erofs_rolling_hash_advance(e_find.hash,
112*33b1fccfSAndroid Build Coastguard Worker rollinghash_rm, cur[window_size], cur[0]);
113*33b1fccfSAndroid Build Coastguard Worker }
114*33b1fccfSAndroid Build Coastguard Worker
115*33b1fccfSAndroid Build Coastguard Worker p = &dedupe_tree[e_find.hash & (ARRAY_SIZE(dedupe_tree) - 1)];
116*33b1fccfSAndroid Build Coastguard Worker list_for_each_entry(e, p, list) {
117*33b1fccfSAndroid Build Coastguard Worker if (e->hash != e_find.hash)
118*33b1fccfSAndroid Build Coastguard Worker continue;
119*33b1fccfSAndroid Build Coastguard Worker if (!extra) {
120*33b1fccfSAndroid Build Coastguard Worker xxh64_csum = xxh64(cur, window_size, 0);
121*33b1fccfSAndroid Build Coastguard Worker extra = 1;
122*33b1fccfSAndroid Build Coastguard Worker }
123*33b1fccfSAndroid Build Coastguard Worker if (e->prefix_xxh64 == xxh64_csum)
124*33b1fccfSAndroid Build Coastguard Worker break;
125*33b1fccfSAndroid Build Coastguard Worker }
126*33b1fccfSAndroid Build Coastguard Worker
127*33b1fccfSAndroid Build Coastguard Worker if (&e->list == p)
128*33b1fccfSAndroid Build Coastguard Worker continue;
129*33b1fccfSAndroid Build Coastguard Worker
130*33b1fccfSAndroid Build Coastguard Worker erofs_sha256(cur, window_size, sha256);
131*33b1fccfSAndroid Build Coastguard Worker if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
132*33b1fccfSAndroid Build Coastguard Worker continue;
133*33b1fccfSAndroid Build Coastguard Worker
134*33b1fccfSAndroid Build Coastguard Worker extra = min_t(unsigned int, ctx->end - cur - window_size,
135*33b1fccfSAndroid Build Coastguard Worker e->original_length - window_size);
136*33b1fccfSAndroid Build Coastguard Worker extra = erofs_memcmp2(cur + window_size, e->extra_data, extra);
137*33b1fccfSAndroid Build Coastguard Worker if (window_size + extra <= ctx->cur - cur)
138*33b1fccfSAndroid Build Coastguard Worker continue;
139*33b1fccfSAndroid Build Coastguard Worker ctx->cur = cur;
140*33b1fccfSAndroid Build Coastguard Worker ctx->e.length = window_size + extra;
141*33b1fccfSAndroid Build Coastguard Worker ctx->e.partial = e->partial ||
142*33b1fccfSAndroid Build Coastguard Worker (window_size + extra < e->original_length);
143*33b1fccfSAndroid Build Coastguard Worker ctx->e.raw = e->raw;
144*33b1fccfSAndroid Build Coastguard Worker ctx->e.inlined = false;
145*33b1fccfSAndroid Build Coastguard Worker ctx->e.blkaddr = e->compressed_blkaddr;
146*33b1fccfSAndroid Build Coastguard Worker ctx->e.compressedblks = e->compressed_blks;
147*33b1fccfSAndroid Build Coastguard Worker return 0;
148*33b1fccfSAndroid Build Coastguard Worker }
149*33b1fccfSAndroid Build Coastguard Worker return -ENOENT;
150*33b1fccfSAndroid Build Coastguard Worker }
151*33b1fccfSAndroid Build Coastguard Worker
z_erofs_dedupe_insert(struct z_erofs_inmem_extent * e,void * original_data)152*33b1fccfSAndroid Build Coastguard Worker int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
153*33b1fccfSAndroid Build Coastguard Worker void *original_data)
154*33b1fccfSAndroid Build Coastguard Worker {
155*33b1fccfSAndroid Build Coastguard Worker struct list_head *p;
156*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item *di, *k;
157*33b1fccfSAndroid Build Coastguard Worker
158*33b1fccfSAndroid Build Coastguard Worker if (!window_size || e->length < window_size)
159*33b1fccfSAndroid Build Coastguard Worker return 0;
160*33b1fccfSAndroid Build Coastguard Worker
161*33b1fccfSAndroid Build Coastguard Worker di = malloc(sizeof(*di) + e->length - window_size);
162*33b1fccfSAndroid Build Coastguard Worker if (!di)
163*33b1fccfSAndroid Build Coastguard Worker return -ENOMEM;
164*33b1fccfSAndroid Build Coastguard Worker
165*33b1fccfSAndroid Build Coastguard Worker di->original_length = e->length;
166*33b1fccfSAndroid Build Coastguard Worker erofs_sha256(original_data, window_size, di->prefix_sha256);
167*33b1fccfSAndroid Build Coastguard Worker
168*33b1fccfSAndroid Build Coastguard Worker di->prefix_xxh64 = xxh64(original_data, window_size, 0);
169*33b1fccfSAndroid Build Coastguard Worker di->hash = erofs_rolling_hash_init(original_data,
170*33b1fccfSAndroid Build Coastguard Worker window_size, true);
171*33b1fccfSAndroid Build Coastguard Worker memcpy(di->extra_data, original_data + window_size,
172*33b1fccfSAndroid Build Coastguard Worker e->length - window_size);
173*33b1fccfSAndroid Build Coastguard Worker di->compressed_blkaddr = e->blkaddr;
174*33b1fccfSAndroid Build Coastguard Worker di->compressed_blks = e->compressedblks;
175*33b1fccfSAndroid Build Coastguard Worker di->partial = e->partial;
176*33b1fccfSAndroid Build Coastguard Worker di->raw = e->raw;
177*33b1fccfSAndroid Build Coastguard Worker
178*33b1fccfSAndroid Build Coastguard Worker /* skip the same xxh64 hash */
179*33b1fccfSAndroid Build Coastguard Worker p = &dedupe_tree[di->hash & (ARRAY_SIZE(dedupe_tree) - 1)];
180*33b1fccfSAndroid Build Coastguard Worker list_for_each_entry(k, p, list) {
181*33b1fccfSAndroid Build Coastguard Worker if (k->prefix_xxh64 == di->prefix_xxh64) {
182*33b1fccfSAndroid Build Coastguard Worker free(di);
183*33b1fccfSAndroid Build Coastguard Worker return 0;
184*33b1fccfSAndroid Build Coastguard Worker }
185*33b1fccfSAndroid Build Coastguard Worker }
186*33b1fccfSAndroid Build Coastguard Worker di->chain = dedupe_subtree;
187*33b1fccfSAndroid Build Coastguard Worker dedupe_subtree = di;
188*33b1fccfSAndroid Build Coastguard Worker list_add_tail(&di->list, p);
189*33b1fccfSAndroid Build Coastguard Worker return 0;
190*33b1fccfSAndroid Build Coastguard Worker }
191*33b1fccfSAndroid Build Coastguard Worker
z_erofs_dedupe_commit(bool drop)192*33b1fccfSAndroid Build Coastguard Worker void z_erofs_dedupe_commit(bool drop)
193*33b1fccfSAndroid Build Coastguard Worker {
194*33b1fccfSAndroid Build Coastguard Worker if (!dedupe_subtree)
195*33b1fccfSAndroid Build Coastguard Worker return;
196*33b1fccfSAndroid Build Coastguard Worker if (drop) {
197*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item *di, *n;
198*33b1fccfSAndroid Build Coastguard Worker
199*33b1fccfSAndroid Build Coastguard Worker for (di = dedupe_subtree; di; di = n) {
200*33b1fccfSAndroid Build Coastguard Worker n = di->chain;
201*33b1fccfSAndroid Build Coastguard Worker list_del(&di->list);
202*33b1fccfSAndroid Build Coastguard Worker free(di);
203*33b1fccfSAndroid Build Coastguard Worker }
204*33b1fccfSAndroid Build Coastguard Worker }
205*33b1fccfSAndroid Build Coastguard Worker dedupe_subtree = NULL;
206*33b1fccfSAndroid Build Coastguard Worker }
207*33b1fccfSAndroid Build Coastguard Worker
z_erofs_dedupe_init(unsigned int wsiz)208*33b1fccfSAndroid Build Coastguard Worker int z_erofs_dedupe_init(unsigned int wsiz)
209*33b1fccfSAndroid Build Coastguard Worker {
210*33b1fccfSAndroid Build Coastguard Worker struct list_head *p;
211*33b1fccfSAndroid Build Coastguard Worker
212*33b1fccfSAndroid Build Coastguard Worker for (p = dedupe_tree;
213*33b1fccfSAndroid Build Coastguard Worker p < dedupe_tree + ARRAY_SIZE(dedupe_tree); ++p)
214*33b1fccfSAndroid Build Coastguard Worker init_list_head(p);
215*33b1fccfSAndroid Build Coastguard Worker
216*33b1fccfSAndroid Build Coastguard Worker window_size = wsiz;
217*33b1fccfSAndroid Build Coastguard Worker rollinghash_rm = erofs_rollinghash_calc_rm(window_size);
218*33b1fccfSAndroid Build Coastguard Worker return 0;
219*33b1fccfSAndroid Build Coastguard Worker }
220*33b1fccfSAndroid Build Coastguard Worker
z_erofs_dedupe_exit(void)221*33b1fccfSAndroid Build Coastguard Worker void z_erofs_dedupe_exit(void)
222*33b1fccfSAndroid Build Coastguard Worker {
223*33b1fccfSAndroid Build Coastguard Worker struct z_erofs_dedupe_item *di, *n;
224*33b1fccfSAndroid Build Coastguard Worker struct list_head *p;
225*33b1fccfSAndroid Build Coastguard Worker
226*33b1fccfSAndroid Build Coastguard Worker if (!window_size)
227*33b1fccfSAndroid Build Coastguard Worker return;
228*33b1fccfSAndroid Build Coastguard Worker
229*33b1fccfSAndroid Build Coastguard Worker z_erofs_dedupe_commit(true);
230*33b1fccfSAndroid Build Coastguard Worker
231*33b1fccfSAndroid Build Coastguard Worker for (p = dedupe_tree;
232*33b1fccfSAndroid Build Coastguard Worker p < dedupe_tree + ARRAY_SIZE(dedupe_tree); ++p) {
233*33b1fccfSAndroid Build Coastguard Worker list_for_each_entry_safe(di, n, p, list) {
234*33b1fccfSAndroid Build Coastguard Worker list_del(&di->list);
235*33b1fccfSAndroid Build Coastguard Worker free(di);
236*33b1fccfSAndroid Build Coastguard Worker }
237*33b1fccfSAndroid Build Coastguard Worker }
238*33b1fccfSAndroid Build Coastguard Worker dedupe_subtree = NULL;
239*33b1fccfSAndroid Build Coastguard Worker }
240