xref: /aosp_15_r20/external/erofs-utils/lib/fragments.c (revision 33b1fccf6a0fada2c2875d400ed01119b7676ee5)
1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3  * Copyright (C), 2022, Coolpad Group Limited.
4  * Created by Yue Hu <[email protected]>
5  */
6 #ifndef _LARGEFILE_SOURCE
7 #define _LARGEFILE_SOURCE
8 #endif
9 #ifndef _LARGEFILE64_SOURCE
10 #define _LARGEFILE64_SOURCE
11 #endif
12 #ifndef _FILE_OFFSET_BITS
13 #define _FILE_OFFSET_BITS 64
14 #endif
15 #ifndef _GNU_SOURCE
16 #define _GNU_SOURCE
17 #endif
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <sys/mman.h>
21 #include "erofs/err.h"
22 #include "erofs/inode.h"
23 #include "erofs/compress.h"
24 #include "erofs/print.h"
25 #include "erofs/internal.h"
26 #include "erofs/fragments.h"
27 
28 struct erofs_fragment_dedupe_item {
29 	struct list_head	list;
30 	unsigned int		length;
31 	erofs_off_t		pos;
32 	u8			data[];
33 };
34 
35 #define EROFS_TOF_HASHLEN		16
36 
37 #define FRAGMENT_HASHSIZE		65536
38 #define FRAGMENT_HASH(c)		((c) & (FRAGMENT_HASHSIZE - 1))
39 
40 static struct list_head dupli_frags[FRAGMENT_HASHSIZE];
41 static FILE *packedfile;
42 const char *erofs_frags_packedname = "packed_file";
43 
44 #ifndef HAVE_LSEEK64
45 #define erofs_lseek64 lseek
46 #else
47 #define erofs_lseek64 lseek64
48 #endif
49 
z_erofs_fragments_dedupe_find(struct erofs_inode * inode,int fd,u32 crc)50 static int z_erofs_fragments_dedupe_find(struct erofs_inode *inode, int fd,
51 					 u32 crc)
52 {
53 	struct erofs_fragment_dedupe_item *cur, *di = NULL;
54 	struct list_head *head;
55 	u8 *data;
56 	unsigned int length, e2, deduped;
57 	erofs_off_t pos;
58 	int ret;
59 
60 	head = &dupli_frags[FRAGMENT_HASH(crc)];
61 	if (list_empty(head))
62 		return 0;
63 
64 	/* XXX: no need to read so much for smaller? */
65 	if (inode->i_size < EROFS_CONFIG_COMPR_MAX_SZ)
66 		length = inode->i_size;
67 	else
68 		length = EROFS_CONFIG_COMPR_MAX_SZ;
69 
70 	data = malloc(length);
71 	if (!data)
72 		return -ENOMEM;
73 
74 	if (erofs_lseek64(fd, inode->i_size - length, SEEK_SET) < 0) {
75 		ret = -errno;
76 		goto out;
77 	}
78 
79 	ret = read(fd, data, length);
80 	if (ret != length) {
81 		ret = -errno;
82 		goto out;
83 	}
84 
85 	DBG_BUGON(length <= EROFS_TOF_HASHLEN);
86 	e2 = length - EROFS_TOF_HASHLEN;
87 	deduped = 0;
88 
89 	list_for_each_entry(cur, head, list) {
90 		unsigned int e1, mn, i = 0;
91 
92 		DBG_BUGON(cur->length <= EROFS_TOF_HASHLEN);
93 		e1 = cur->length - EROFS_TOF_HASHLEN;
94 
95 		if (memcmp(cur->data + e1, data + e2, EROFS_TOF_HASHLEN))
96 			continue;
97 
98 		mn = min(e1, e2);
99 		while (i < mn && cur->data[e1 - i - 1] == data[e2 - i - 1])
100 			++i;
101 
102 		if (!di || i + EROFS_TOF_HASHLEN > deduped) {
103 			deduped = i + EROFS_TOF_HASHLEN;
104 			di = cur;
105 
106 			/* full match */
107 			if (i == e2)
108 				break;
109 		}
110 	}
111 	if (!di)
112 		goto out;
113 
114 	DBG_BUGON(di->length < deduped);
115 	pos = di->pos + di->length - deduped;
116 	/* let's read more to dedupe as long as we can */
117 	if (deduped == di->length) {
118 		fflush(packedfile);
119 
120 		while(deduped < inode->i_size && pos) {
121 			char buf[2][16384];
122 			unsigned int sz = min_t(unsigned int, pos,
123 						sizeof(buf[0]));
124 
125 			if (pread(fileno(packedfile), buf[0], sz,
126 				  pos - sz) != sz)
127 				break;
128 			if (pread(fd, buf[1], sz,
129 				  inode->i_size - deduped - sz) != sz)
130 				break;
131 
132 			if (memcmp(buf[0], buf[1], sz))
133 				break;
134 			pos -= sz;
135 			deduped += sz;
136 		}
137 	}
138 	inode->fragment_size = deduped;
139 	inode->fragmentoff = pos;
140 
141 	erofs_dbg("Dedupe %u tail data at %llu", inode->fragment_size,
142 		  inode->fragmentoff | 0ULL);
143 out:
144 	free(data);
145 	return ret;
146 }
147 
z_erofs_fragments_dedupe(struct erofs_inode * inode,int fd,u32 * tofcrc)148 int z_erofs_fragments_dedupe(struct erofs_inode *inode, int fd, u32 *tofcrc)
149 {
150 	u8 data_to_hash[EROFS_TOF_HASHLEN];
151 	int ret;
152 
153 	if (inode->i_size <= EROFS_TOF_HASHLEN)
154 		return 0;
155 
156 	if (erofs_lseek64(fd, inode->i_size - EROFS_TOF_HASHLEN, SEEK_SET) < 0)
157 		return -errno;
158 
159 	ret = read(fd, data_to_hash, EROFS_TOF_HASHLEN);
160 	if (ret != EROFS_TOF_HASHLEN)
161 		return -errno;
162 
163 	*tofcrc = erofs_crc32c(~0, data_to_hash, EROFS_TOF_HASHLEN);
164 	ret = z_erofs_fragments_dedupe_find(inode, fd, *tofcrc);
165 	if (ret < 0)
166 		return ret;
167 	ret = lseek(fd, 0, SEEK_SET);
168 	if (ret < 0)
169 		return -errno;
170 	return 0;
171 }
172 
z_erofs_fragments_dedupe_insert(void * data,unsigned int len,erofs_off_t pos,u32 crc)173 static int z_erofs_fragments_dedupe_insert(void *data, unsigned int len,
174 					   erofs_off_t pos, u32 crc)
175 {
176 	struct erofs_fragment_dedupe_item *di;
177 
178 	if (len <= EROFS_TOF_HASHLEN)
179 		return 0;
180 	if (len > EROFS_CONFIG_COMPR_MAX_SZ) {
181 		data += len - EROFS_CONFIG_COMPR_MAX_SZ;
182 		pos += len - EROFS_CONFIG_COMPR_MAX_SZ;
183 		len = EROFS_CONFIG_COMPR_MAX_SZ;
184 	}
185 	di = malloc(sizeof(*di) + len);
186 	if (!di)
187 		return -ENOMEM;
188 
189 	memcpy(di->data, data, len);
190 	di->length = len;
191 	di->pos = pos;
192 
193 	list_add_tail(&di->list, &dupli_frags[FRAGMENT_HASH(crc)]);
194 	return 0;
195 }
196 
z_erofs_fragments_init(void)197 int z_erofs_fragments_init(void)
198 {
199 	unsigned int i;
200 
201 	for (i = 0; i < FRAGMENT_HASHSIZE; ++i)
202 		init_list_head(&dupli_frags[i]);
203 	return 0;
204 }
205 
z_erofs_fragments_exit(void)206 void z_erofs_fragments_exit(void)
207 {
208 	struct erofs_fragment_dedupe_item *di, *n;
209 	struct list_head *head;
210 	unsigned int i;
211 
212 	for (i = 0; i < FRAGMENT_HASHSIZE; ++i) {
213 		head = &dupli_frags[i];
214 
215 		list_for_each_entry_safe(di, n, head, list)
216 			free(di);
217 	}
218 }
219 
z_erofs_fragments_commit(struct erofs_inode * inode)220 void z_erofs_fragments_commit(struct erofs_inode *inode)
221 {
222 	if (!inode->fragment_size)
223 		return;
224 	/*
225 	 * If the packed inode is larger than 4GiB, the full fragmentoff
226 	 * will be recorded by switching to the noncompact layout anyway.
227 	 */
228 	if (inode->fragmentoff >> 32)
229 		inode->datalayout = EROFS_INODE_COMPRESSED_FULL;
230 
231 	inode->z_advise |= Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
232 	erofs_sb_set_fragments(inode->sbi);
233 }
234 
z_erofs_pack_file_from_fd(struct erofs_inode * inode,int fd,u32 tofcrc)235 int z_erofs_pack_file_from_fd(struct erofs_inode *inode, int fd,
236 			      u32 tofcrc)
237 {
238 #ifdef HAVE_FTELLO64
239 	off64_t offset = ftello64(packedfile);
240 #else
241 	off_t offset = ftello(packedfile);
242 #endif
243 	char *memblock;
244 	int rc;
245 
246 	if (offset < 0)
247 		return -errno;
248 
249 	inode->fragmentoff = (erofs_off_t)offset;
250 	inode->fragment_size = inode->i_size;
251 
252 	memblock = mmap(NULL, inode->i_size, PROT_READ, MAP_SHARED, fd, 0);
253 	if (memblock == MAP_FAILED || !memblock) {
254 		unsigned long long remaining = inode->fragment_size;
255 
256 		memblock = NULL;
257 		while (remaining) {
258 			char buf[32768];
259 			unsigned int sz = min_t(unsigned int, remaining,
260 						sizeof(buf));
261 
262 			rc = read(fd, buf, sz);
263 			if (rc != sz) {
264 				if (rc < 0)
265 					rc = -errno;
266 				else
267 					rc = -EAGAIN;
268 				goto out;
269 			}
270 			if (fwrite(buf, sz, 1, packedfile) != 1) {
271 				rc = -EIO;
272 				goto out;
273 			}
274 			remaining -= sz;
275 		}
276 		rc = lseek(fd, 0, SEEK_SET);
277 		if (rc < 0) {
278 			rc = -errno;
279 			goto out;
280 		}
281 	} else if (fwrite(memblock, inode->fragment_size, 1, packedfile) != 1) {
282 		rc = -EIO;
283 		goto out;
284 	}
285 
286 	erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
287 		  inode->fragmentoff);
288 
289 	if (memblock)
290 		rc = z_erofs_fragments_dedupe_insert(memblock,
291 			inode->fragment_size, inode->fragmentoff, tofcrc);
292 	else
293 		rc = 0;
294 out:
295 	if (memblock)
296 		munmap(memblock, inode->i_size);
297 	return rc;
298 }
299 
z_erofs_pack_fragments(struct erofs_inode * inode,void * data,unsigned int len,u32 tofcrc)300 int z_erofs_pack_fragments(struct erofs_inode *inode, void *data,
301 			   unsigned int len, u32 tofcrc)
302 {
303 #ifdef HAVE_FTELLO64
304 	off64_t offset = ftello64(packedfile);
305 #else
306 	off_t offset = ftello(packedfile);
307 #endif
308 	int ret;
309 
310 	if (offset < 0)
311 		return -errno;
312 
313 	inode->fragmentoff = (erofs_off_t)offset;
314 	inode->fragment_size = len;
315 
316 	if (fwrite(data, len, 1, packedfile) != 1)
317 		return -EIO;
318 
319 	erofs_dbg("Recording %u fragment data at %lu", inode->fragment_size,
320 		  inode->fragmentoff);
321 
322 	ret = z_erofs_fragments_dedupe_insert(data, len, inode->fragmentoff,
323 					      tofcrc);
324 	if (ret)
325 		return ret;
326 	return len;
327 }
328 
erofs_flush_packed_inode(struct erofs_sb_info * sbi)329 int erofs_flush_packed_inode(struct erofs_sb_info *sbi)
330 {
331 	struct erofs_inode *inode;
332 
333 	if (!erofs_sb_has_fragments(sbi))
334 		return -EINVAL;
335 	fflush(packedfile);
336 	if (!ftello(packedfile))
337 		return 0;
338 
339 	inode = erofs_mkfs_build_special_from_fd(sbi, fileno(packedfile),
340 						 EROFS_PACKED_INODE);
341 	sbi->packed_nid = erofs_lookupnid(inode);
342 	erofs_iput(inode);
343 	return 0;
344 }
345 
erofs_packedfile_exit(void)346 void erofs_packedfile_exit(void)
347 {
348 	if (packedfile)
349 		fclose(packedfile);
350 }
351 
erofs_packedfile_init(void)352 FILE *erofs_packedfile_init(void)
353 {
354 #ifdef HAVE_TMPFILE64
355 	packedfile = tmpfile64();
356 #else
357 	packedfile = tmpfile();
358 #endif
359 	if (!packedfile)
360 		return ERR_PTR(-ENOMEM);
361 	return packedfile;
362 }
363