1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3 * Copyright (C), 2022, Coolpad Group Limited.
4 * Created by Yue Hu <[email protected]>
5 */
6 #ifndef _LARGEFILE_SOURCE
7 #define _LARGEFILE_SOURCE
8 #endif
9 #ifndef _LARGEFILE64_SOURCE
10 #define _LARGEFILE64_SOURCE
11 #endif
12 #ifndef _FILE_OFFSET_BITS
13 #define _FILE_OFFSET_BITS 64
14 #endif
15 #ifndef _GNU_SOURCE
16 #define _GNU_SOURCE
17 #endif
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <sys/mman.h>
21 #include "erofs/err.h"
22 #include "erofs/inode.h"
23 #include "erofs/compress.h"
24 #include "erofs/print.h"
25 #include "erofs/internal.h"
26 #include "erofs/fragments.h"
27
28 struct erofs_fragment_dedupe_item {
29 struct list_head list;
30 unsigned int length;
31 erofs_off_t pos;
32 u8 data[];
33 };
34
35 #define EROFS_TOF_HASHLEN 16
36
37 #define FRAGMENT_HASHSIZE 65536
38 #define FRAGMENT_HASH(c) ((c) & (FRAGMENT_HASHSIZE - 1))
39
40 static struct list_head dupli_frags[FRAGMENT_HASHSIZE];
41 static FILE *packedfile;
42 const char *erofs_frags_packedname = "packed_file";
43
44 #ifndef HAVE_LSEEK64
45 #define erofs_lseek64 lseek
46 #else
47 #define erofs_lseek64 lseek64
48 #endif
49
z_erofs_fragments_dedupe_find(struct erofs_inode * inode,int fd,u32 crc)50 static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
51 u32 crc)
52 {
53 struct erofs_fragment_dedupe_item *cur, *di = NULL;
54 struct list_head *head;
55 u8 *data;
56 unsigned int length, e2, deduped;
57 erofs_off_t pos;
58 int ret;
59
60 head = &dupli_frags[FRAGMENT_HASH(crc)];
61 if (list_empty(head))
62 return 0;
63
64 /* XXX: no need to read so much for smaller? */
65 if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ)
66 length = inode->i_size;
67 else
68 length = EROFS_CONFIG_COMPR_MAX_SZ;
69
70 data = malloc(length);
71 if (!data)
72 return -ENOMEM;
73
74 if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) {
75 ret = -errno;
76 goto out;
77 }
78
79 ret = read(fd, data, length);
80 if (ret != length) {
81 ret = -errno;
82 goto out;
83 }
84
85 DBG_BUGON(length <= EROFS_TOF_HASHLEN);
86 e2 = length - EROFS_TOF_HASHLEN;
87 deduped = 0;
88
89 list_for_each_entry(cur, head, list) {
90 unsigned int e1, mn, i = 0;
91
92 DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN);
93 e1 = cur->length - EROFS_TOF_HASHLEN;
94
95 if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN))
96 continue;
97
98 mn = min(e1, e2);
99 while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
100 ++i;
101
102 if (!di || i + EROFS_TOF_HASHLEN > deduped) {
103 deduped = i + EROFS_TOF_HASHLEN;
104 di = cur;
105
106 /* full match */
107 if (i == e2)
108 break;
109 }
110 }
111 if (!di)
112 goto out;
113
114 DBG_BUGON(di->length < deduped);
115 pos = di->pos + di->length - deduped;
116 /* let's read more to dedupe as long as we can */
117 if (deduped == di->length) {
118 fflush(packedfile);
119
120 while(deduped < inode->i_size && pos) {
121 char buf[2][16384];
122 unsigned int sz = min_t(unsigned int, pos,
123 sizeof(buf[0]));
124
125 if (pread(fileno(packedfile), buf[0], sz,
126 pos - sz) != sz)
127 break;
128 if (pread(fd, buf[1], sz,
129 inode->i_size - deduped - sz) != sz)
130 break;
131
132 if (memcmp(buf[0], buf[1], sz))
133 break;
134 pos -= sz;
135 deduped += sz;
136 }
137 }
138 inode->fragment_size = deduped;
139 inode->fragmentoff = pos;
140
141 erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
142 inode->fragmentoff | 0ULL);
143 out:
144 free(data);
145 return ret;
146 }
147
z_erofs_fragments_dedupe(struct erofs_inode * inode,int fd,u32 * tofcrc)148 int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
149 {
150 u8 data_to_hash[EROFS_TOF_HASHLEN];
151 int ret;
152
153 if (inode->i_size <= EROFS_TOF_HASHLEN)
154 return 0;
155
156 if (erofs_lseek64(fd, inode->i_size - EROFS_TOF_HASHLEN, SEEK_SET) < 0)
157 return -errno;
158
159 ret = read(fd, data_to_hash, EROFS_TOF_HASHLEN);
160 if (ret != EROFS_TOF_HASHLEN)
161 return -errno;
162
163 *tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
164 ret = z_erofs_fragments_dedupe_find(inode, fd, *tofcrc);
165 if (ret < 0)
166 return ret;
167 ret = lseek(fd, 0, SEEK_SET);
168 if (ret < 0)
169 return -errno;
170 return 0;
171 }
172
z_erofs_fragments_dedupe_insert(void * data,unsigned int len,erofs_off_t pos,u32 crc)173 static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
174 erofs_off_t pos, u32 crc)
175 {
176 struct erofs_fragment_dedupe_item *di;
177
178 if (len <= EROFS_TOF_HASHLEN)
179 return 0;
180 if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
181 data += len - EROFS_CONFIG_COMPR_MAX_SZ;
182 pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
183 len = EROFS_CONFIG_COMPR_MAX_SZ;
184 }
185 di = malloc(sizeof(*di) + len);
186 if (!di)
187 return -ENOMEM;
188
189 memcpy(di->data, data, len);
190 di->length = len;
191 di->pos = pos;
192
193 list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]);
194 return 0;
195 }
196
z_erofs_fragments_init(void)197 int z_erofs_fragments_init(void)
198 {
199 unsigned int i;
200
201 for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
202 init_list_head(&dupli_frags[i]);
203 return 0;
204 }
205
z_erofs_fragments_exit(void)206 void z_erofs_fragments_exit(void)
207 {
208 struct erofs_fragment_dedupe_item *di, *n;
209 struct list_head *head;
210 unsigned int i;
211
212 for (i = 0; i < FRAGMENT_HASHSIZE; ++i) {
213 head = &dupli_frags[i];
214
215 list_for_each_entry_safe(di, n, head, list)
216 free(di);
217 }
218 }
219
z_erofs_fragments_commit(struct erofs_inode * inode)220 void z_erofs_fragments_commit(struct erofs_inode *inode)
221 {
222 if (!inode->fragment_size)
223 return;
224 /*
225 * If the packed inode is larger than 4GiB, the full fragmentoff
226 * will be recorded by switching to the noncompact layout anyway.
227 */
228 if (inode->fragmentoff >> 32)
229 inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
230
231 inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
232 erofs_sb_set_fragments(inode->sbi);
233 }
234
z_erofs_pack_file_from_fd(struct erofs_inode * inode,int fd,u32 tofcrc)235 int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd,
236 u32 tofcrc)
237 {
238 #ifdef HAVE_FTELLO64
239 off64_t offset = ftello64(packedfile);
240 #else
241 off_t offset = ftello(packedfile);
242 #endif
243 char *memblock;
244 int rc;
245
246 if (offset < 0)
247 return -errno;
248
249 inode->fragmentoff = (erofs_off_t)offset;
250 inode->fragment_size = inode->i_size;
251
252 memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0);
253 if (memblock == MAP_FAILED || !memblock) {
254 unsigned long long remaining = inode->fragment_size;
255
256 memblock = NULL;
257 while (remaining) {
258 char buf[32768];
259 unsigned int sz = min_t(unsigned int, remaining,
260 sizeof(buf));
261
262 rc = read(fd, buf, sz);
263 if (rc != sz) {
264 if (rc < 0)
265 rc = -errno;
266 else
267 rc = -EAGAIN;
268 goto out;
269 }
270 if (fwrite(buf, sz, 1, packedfile) != 1) {
271 rc = -EIO;
272 goto out;
273 }
274 remaining -= sz;
275 }
276 rc = lseek(fd, 0, SEEK_SET);
277 if (rc < 0) {
278 rc = -errno;
279 goto out;
280 }
281 } else if (fwrite(memblock, inode->fragment_size, 1, packedfile) != 1) {
282 rc = -EIO;
283 goto out;
284 }
285
286 erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
287 inode->fragmentoff);
288
289 if (memblock)
290 rc = z_erofs_fragments_dedupe_insert(memblock,
291 inode->fragment_size, inode->fragmentoff, tofcrc);
292 else
293 rc = 0;
294 out:
295 if (memblock)
296 munmap(memblock, inode->i_size);
297 return rc;
298 }
299
z_erofs_pack_fragments(struct erofs_inode * inode,void * data,unsigned int len,u32 tofcrc)300 int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
301 unsigned int len, u32 tofcrc)
302 {
303 #ifdef HAVE_FTELLO64
304 off64_t offset = ftello64(packedfile);
305 #else
306 off_t offset = ftello(packedfile);
307 #endif
308 int ret;
309
310 if (offset < 0)
311 return -errno;
312
313 inode->fragmentoff = (erofs_off_t)offset;
314 inode->fragment_size = len;
315
316 if (fwrite(data, len, 1, packedfile) != 1)
317 return -EIO;
318
319 erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
320 inode->fragmentoff);
321
322 ret = z_erofs_fragments_dedupe_insert(data, len, inode->fragmentoff,
323 tofcrc);
324 if (ret)
325 return ret;
326 return len;
327 }
328
erofs_flush_packed_inode(struct erofs_sb_info * sbi)329 int erofs_flush_packed_inode(struct erofs_sb_info *sbi)
330 {
331 struct erofs_inode *inode;
332
333 if (!erofs_sb_has_fragments(sbi))
334 return -EINVAL;
335 fflush(packedfile);
336 if (!ftello(packedfile))
337 return 0;
338
339 inode = erofs_mkfs_build_special_from_fd(sbi, fileno(packedfile),
340 EROFS_PACKED_INODE);
341 sbi->packed_nid = erofs_lookupnid(inode);
342 erofs_iput(inode);
343 return 0;
344 }
345
erofs_packedfile_exit(void)346 void erofs_packedfile_exit(void)
347 {
348 if (packedfile)
349 fclose(packedfile);
350 }
351
erofs_packedfile_init(void)352 FILE *erofs_packedfile_init(void)
353 {
354 #ifdef HAVE_TMPFILE64
355 packedfile = tmpfile64();
356 #else
357 packedfile = tmpfile();
358 #endif
359 if (!packedfile)
360 return ERR_PTR(-ENOMEM);
361 return packedfile;
362 }
363