xref: /aosp_15_r20/external/erofs-utils/lib/tar.c (revision 33b1fccf6a0fada2c2875d400ed01119b7676ee5)
1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 #include <unistd.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include <sys/stat.h>
6 #if defined(HAVE_ZLIB)
7 #include <zlib.h>
8 #endif
9 #include "erofs/print.h"
10 #include "erofs/cache.h"
11 #include "erofs/diskbuf.h"
12 #include "erofs/inode.h"
13 #include "erofs/list.h"
14 #include "erofs/tar.h"
15 #include "erofs/xattr.h"
16 #include "erofs/blobchunk.h"
17 #include "erofs/rebuild.h"
18 
19 /* This file is a tape/volume header.  Ignore it on extraction.  */
20 #define GNUTYPE_VOLHDR 'V'
21 
22 struct tar_header {
23 	char name[100];		/*   0-99 */
24 	char mode[8];		/* 100-107 */
25 	char uid[8];		/* 108-115 */
26 	char gid[8];		/* 116-123 */
27 	char size[12];		/* 124-135 */
28 	char mtime[12];		/* 136-147 */
29 	char chksum[8];		/* 148-155 */
30 	char typeflag;		/* 156-156 */
31 	char linkname[100];	/* 157-256 */
32 	char magic[6];		/* 257-262 */
33 	char version[2];	/* 263-264 */
34 	char uname[32];		/* 265-296 */
35 	char gname[32];		/* 297-328 */
36 	char devmajor[8];	/* 329-336 */
37 	char devminor[8];	/* 337-344 */
38 	char prefix[155];	/* 345-499 */
39 	char padding[12];	/* 500-512 (pad to exactly the 512 byte) */
40 };
41 
erofs_iostream_close(struct erofs_iostream * ios)42 void erofs_iostream_close(struct erofs_iostream *ios)
43 {
44 	free(ios->buffer);
45 	if (ios->decoder == EROFS_IOS_DECODER_GZIP) {
46 #if defined(HAVE_ZLIB)
47 		gzclose(ios->handler);
48 #endif
49 		return;
50 	} else if (ios->decoder == EROFS_IOS_DECODER_LIBLZMA) {
51 #if defined(HAVE_LIBLZMA)
52 		lzma_end(&ios->lzma->strm);
53 		close(ios->lzma->fd);
54 		free(ios->lzma);
55 #endif
56 		return;
57 	}
58 	close(ios->vf.fd);
59 }
60 
erofs_iostream_open(struct erofs_iostream * ios,int fd,int decoder)61 int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder)
62 {
63 	s64 fsz;
64 
65 	ios->feof = false;
66 	ios->tail = ios->head = 0;
67 	ios->decoder = decoder;
68 	ios->dumpfd = -1;
69 	if (decoder == EROFS_IOS_DECODER_GZIP) {
70 #if defined(HAVE_ZLIB)
71 		ios->handler = gzdopen(fd, "r");
72 		if (!ios->handler)
73 			return -ENOMEM;
74 		ios->sz = fsz = 0;
75 		ios->bufsize = 32768;
76 #else
77 		return -EOPNOTSUPP;
78 #endif
79 	} else if (decoder == EROFS_IOS_DECODER_LIBLZMA) {
80 #ifdef HAVE_LIBLZMA
81 		lzma_ret ret;
82 
83 		ios->lzma = malloc(sizeof(*ios->lzma));
84 		if (!ios->lzma)
85 			return -ENOMEM;
86 		ios->lzma->fd = fd;
87 		ios->lzma->strm = (lzma_stream)LZMA_STREAM_INIT;
88 		ret = lzma_auto_decoder(&ios->lzma->strm,
89 					UINT64_MAX, LZMA_CONCATENATED);
90 		if (ret != LZMA_OK)
91 			return -EFAULT;
92 		ios->sz = fsz = 0;
93 		ios->bufsize = 32768;
94 #else
95 		return -EOPNOTSUPP;
96 #endif
97 	} else {
98 		ios->vf.fd = fd;
99 		fsz = lseek(fd, 0, SEEK_END);
100 		if (fsz <= 0) {
101 			ios->feof = !fsz;
102 			ios->sz = 0;
103 		} else {
104 			ios->sz = fsz;
105 			if (lseek(fd, 0, SEEK_SET))
106 				return -EIO;
107 #ifdef HAVE_POSIX_FADVISE
108 			if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL))
109 				erofs_warn("failed to fadvise: %s, ignored.",
110 					   erofs_strerror(-errno));
111 #endif
112 		}
113 		ios->bufsize = 16384;
114 	}
115 
116 	do {
117 		ios->buffer = malloc(ios->bufsize);
118 		if (ios->buffer)
119 			break;
120 		ios->bufsize >>= 1;
121 	} while (ios->bufsize >= 1024);
122 
123 	if (!ios->buffer)
124 		return -ENOMEM;
125 	return 0;
126 }
127 
erofs_iostream_read(struct erofs_iostream * ios,void ** buf,u64 bytes)128 int erofs_iostream_read(struct erofs_iostream *ios, void **buf, u64 bytes)
129 {
130 	unsigned int rabytes = ios->tail - ios->head;
131 	int ret;
132 
133 	if (rabytes >= bytes) {
134 		*buf = ios->buffer + ios->head;
135 		ios->head += bytes;
136 		return bytes;
137 	}
138 
139 	if (ios->head) {
140 		memmove(ios->buffer, ios->buffer + ios->head, rabytes);
141 		ios->head = 0;
142 		ios->tail = rabytes;
143 	}
144 
145 	if (!ios->feof) {
146 		if (ios->decoder == EROFS_IOS_DECODER_GZIP) {
147 #if defined(HAVE_ZLIB)
148 			ret = gzread(ios->handler, ios->buffer + rabytes,
149 				     ios->bufsize - rabytes);
150 			if (!ret) {
151 				int errnum;
152 				const char *errstr;
153 
154 				errstr = gzerror(ios->handler, &errnum);
155 				if (errnum != Z_STREAM_END) {
156 					erofs_err("failed to gzread: %s", errstr);
157 					return -EIO;
158 				}
159 				ios->feof = true;
160 			}
161 			ios->tail += ret;
162 #else
163 			return -EOPNOTSUPP;
164 #endif
165 		} else if (ios->decoder == EROFS_IOS_DECODER_LIBLZMA) {
166 #ifdef HAVE_LIBLZMA
167 			struct erofs_iostream_liblzma *lzma = ios->lzma;
168 			lzma_action action = LZMA_RUN;
169 			lzma_ret ret2;
170 
171 			if (!lzma->strm.avail_in) {
172 				lzma->strm.next_in = lzma->inbuf;
173 				ret = read(lzma->fd, lzma->inbuf,
174 					   sizeof(lzma->inbuf));
175 				if (ret < 0)
176 					return -errno;
177 				lzma->strm.avail_in = ret;
178 				if (ret < sizeof(lzma->inbuf))
179 					action = LZMA_FINISH;
180 			}
181 			lzma->strm.next_out = (u8 *)ios->buffer + rabytes;
182 			lzma->strm.avail_out = ios->bufsize - rabytes;
183 
184 			ret2 = lzma_code(&lzma->strm, action);
185 			if (ret2 != LZMA_OK) {
186 				if (ret2 == LZMA_STREAM_END)
187 					ios->feof = true;
188 				else
189 					return -EIO;
190 			}
191 			ret = ios->bufsize - rabytes - lzma->strm.avail_out;
192 			ios->tail += ret;
193 #else
194 			return -EOPNOTSUPP;
195 #endif
196 		} else {
197 			ret = erofs_io_read(&ios->vf, ios->buffer + rabytes,
198 					    ios->bufsize - rabytes);
199 			if (ret < 0)
200 				return ret;
201 			ios->tail += ret;
202 			if (ret < ios->bufsize - rabytes)
203 				ios->feof = true;
204 		}
205 		if (__erofs_unlikely(ios->dumpfd >= 0))
206 			if (write(ios->dumpfd, ios->buffer + rabytes, ret) < ret)
207 				erofs_err("failed to dump %d bytes of the raw stream: %s",
208 					  ret, erofs_strerror(-errno));
209 	}
210 	*buf = ios->buffer;
211 	ret = min_t(int, ios->tail, min_t(u64, bytes, INT_MAX));
212 	ios->head = ret;
213 	return ret;
214 }
215 
erofs_iostream_bread(struct erofs_iostream * ios,void * buf,u64 bytes)216 int erofs_iostream_bread(struct erofs_iostream *ios, void *buf, u64 bytes)
217 {
218 	u64 rem = bytes;
219 	void *src;
220 	int ret;
221 
222 	do {
223 		ret = erofs_iostream_read(ios, &src, rem);
224 		if (ret < 0)
225 			return ret;
226 		memcpy(buf, src, ret);
227 		rem -= ret;
228 	} while (rem && ret);
229 
230 	return bytes - rem;
231 }
232 
erofs_iostream_lskip(struct erofs_iostream * ios,u64 sz)233 int erofs_iostream_lskip(struct erofs_iostream *ios, u64 sz)
234 {
235 	unsigned int rabytes = ios->tail - ios->head;
236 	int ret;
237 	void *dummy;
238 
239 	if (rabytes >= sz) {
240 		ios->head += sz;
241 		return 0;
242 	}
243 
244 	sz -= rabytes;
245 	ios->head = ios->tail = 0;
246 	if (ios->feof)
247 		return sz;
248 
249 	if (ios->sz && __erofs_likely(ios->dumpfd < 0)) {
250 		s64 cur = erofs_io_lseek(&ios->vf, sz, SEEK_CUR);
251 
252 		if (cur > ios->sz)
253 			return cur - ios->sz;
254 		return 0;
255 	}
256 
257 	do {
258 		ret = erofs_iostream_read(ios, &dummy, sz);
259 		if (ret < 0)
260 			return ret;
261 		sz -= ret;
262 	} while (!(ios->feof || !ret || !sz));
263 
264 	return sz;
265 }
266 
tarerofs_otoi(const char * ptr,int len)267 static long long tarerofs_otoi(const char *ptr, int len)
268 {
269 	char inp[32];
270 	char *endp = inp;
271 	long long val;
272 
273 	memcpy(inp, ptr, len);
274 	inp[len] = '\0';
275 
276 	errno = 0;
277 	val = strtol(ptr, &endp, 8);
278 	if ((!val && endp == inp) |
279 	     (*endp && *endp != ' '))
280 		errno = EINVAL;
281 	return val;
282 }
283 
tarerofs_parsenum(const char * ptr,int len)284 static long long tarerofs_parsenum(const char *ptr, int len)
285 {
286 	/*
287 	 * For fields containing numbers or timestamps that are out of range
288 	 * for the basic format, the GNU format uses a base-256 representation
289 	 * instead of an ASCII octal number.
290 	 */
291 	if (*(char *)ptr == '\200') {
292 		long long res = 0;
293 
294 		while (--len)
295 			res = (res << 8) + (u8)*(++ptr);
296 		return res;
297 	}
298 	return tarerofs_otoi(ptr, len);
299 }
300 
301 struct tarerofs_xattr_item {
302 	struct list_head list;
303 	char *kv;
304 	unsigned int len, namelen;
305 };
306 
tarerofs_insert_xattr(struct list_head * xattrs,char * kv,int namelen,int len,bool skip)307 int tarerofs_insert_xattr(struct list_head *xattrs,
308 			  char *kv, int namelen, int len, bool skip)
309 {
310 	struct tarerofs_xattr_item *item;
311 	char *nv;
312 
313 	DBG_BUGON(namelen >= len);
314 	list_for_each_entry(item, xattrs, list) {
315 		if (!strncmp(item->kv, kv, namelen + 1)) {
316 			if (skip)
317 				return 0;
318 			goto found;
319 		}
320 	}
321 
322 	item = malloc(sizeof(*item));
323 	if (!item)
324 		return -ENOMEM;
325 	item->kv = NULL;
326 	item->namelen = namelen;
327 	namelen = 0;
328 	list_add_tail(&item->list, xattrs);
329 found:
330 	nv = realloc(item->kv, len);
331 	if (!nv)
332 		return -ENOMEM;
333 	item->kv = nv;
334 	item->len = len;
335 	memcpy(nv + namelen, kv + namelen, len - namelen);
336 	return 0;
337 }
338 
tarerofs_merge_xattrs(struct list_head * dst,struct list_head * src)339 int tarerofs_merge_xattrs(struct list_head *dst, struct list_head *src)
340 {
341 	struct tarerofs_xattr_item *item;
342 
343 	list_for_each_entry(item, src, list) {
344 		int ret;
345 
346 		ret = tarerofs_insert_xattr(dst, item->kv, item->namelen,
347 					    item->len, true);
348 		if (ret)
349 			return ret;
350 	}
351 	return 0;
352 }
353 
tarerofs_remove_xattrs(struct list_head * xattrs)354 void tarerofs_remove_xattrs(struct list_head *xattrs)
355 {
356 	struct tarerofs_xattr_item *item, *n;
357 
358 	list_for_each_entry_safe(item, n, xattrs, list) {
359 		DBG_BUGON(!item->kv);
360 		free(item->kv);
361 		list_del(&item->list);
362 		free(item);
363 	}
364 }
365 
tarerofs_apply_xattrs(struct erofs_inode * inode,struct list_head * xattrs)366 int tarerofs_apply_xattrs(struct erofs_inode *inode, struct list_head *xattrs)
367 {
368 	struct tarerofs_xattr_item *item;
369 	int ret;
370 
371 	list_for_each_entry(item, xattrs, list) {
372 		const char *v = item->kv + item->namelen + 1;
373 		unsigned int vsz = item->len - item->namelen - 1;
374 
375 		if (item->len <= item->namelen - 1) {
376 			DBG_BUGON(item->len < item->namelen - 1);
377 			continue;
378 		}
379 		item->kv[item->namelen] = '\0';
380 		erofs_dbg("Recording xattr(%s)=\"%s\" (of %u bytes) to file %s",
381 			  item->kv, v, vsz, inode->i_srcpath);
382 		ret = erofs_setxattr(inode, item->kv, v, vsz);
383 		if (ret == -ENODATA)
384 			erofs_err("Failed to set xattr(%s)=%s to file %s",
385 				  item->kv, v, inode->i_srcpath);
386 		else if (ret)
387 			return ret;
388 	}
389 	return 0;
390 }
391 
392 static const char lookup_table[65] =
393 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
394 
base64_decode(const char * src,int len,u8 * dst)395 static int base64_decode(const char *src, int len, u8 *dst)
396 {
397 	int i, bits = 0, ac = 0;
398 	const char *p;
399 	u8 *cp = dst;
400 
401 	if(!(len % 4)) {
402 		/* Check for and ignore any end padding */
403 		if (src[len - 2] == '=' && src[len - 1] == '=')
404 			len -= 2;
405 		else if (src[len - 1] == '=')
406 			--len;
407 	}
408 
409 	for (i = 0; i < len; i++) {
410 		p = strchr(lookup_table, src[i]);
411 		if (p == NULL || src[i] == 0)
412 			return -2;
413 		ac += (p - lookup_table) << bits;
414 		bits += 6;
415 		if (bits >= 8) {
416 			*cp++ = ac & 0xff;
417 			ac >>= 8;
418 			bits -= 8;
419 		}
420 	}
421 	if (ac)
422 		return -1;
423 	return cp - dst;
424 }
425 
tarerofs_parse_pax_header(struct erofs_iostream * ios,struct erofs_pax_header * eh,u32 size)426 int tarerofs_parse_pax_header(struct erofs_iostream *ios,
427 			      struct erofs_pax_header *eh, u32 size)
428 {
429 	char *buf, *p;
430 	int ret;
431 
432 	buf = malloc(size);
433 	if (!buf)
434 		return -ENOMEM;
435 	p = buf;
436 
437 	ret = erofs_iostream_bread(ios, buf, size);
438 	if (ret != size)
439 		goto out;
440 
441 	while (p < buf + size) {
442 		char *kv, *value;
443 		int len, n;
444 		/* extended records are of the format: "LEN NAME=VALUE\n" */
445 		ret = sscanf(p, "%d %n", &len, &n);
446 		if (ret < 1 || len <= n || len > buf + size - p) {
447 			ret = -EIO;
448 			goto out;
449 		}
450 		kv = p + n;
451 		p += len;
452 		len -= n;
453 
454 		if (p[-1] != '\n') {
455 			ret = -EIO;
456 			goto out;
457 		}
458 		p[-1] = '\0';
459 
460 		value = memchr(kv, '=', p - kv);
461 		if (!value) {
462 			ret = -EIO;
463 			goto out;
464 		} else {
465 			long long lln;
466 
467 			value++;
468 
469 			if (!strncmp(kv, "path=", sizeof("path=") - 1)) {
470 				int j = p - 1 - value;
471 				free(eh->path);
472 				eh->path = strdup(value);
473 				while (eh->path[j - 1] == '/')
474 					eh->path[--j] = '\0';
475 			} else if (!strncmp(kv, "linkpath=",
476 					sizeof("linkpath=") - 1)) {
477 				free(eh->link);
478 				eh->link = strdup(value);
479 			} else if (!strncmp(kv, "mtime=",
480 					sizeof("mtime=") - 1)) {
481 				ret = sscanf(value, "%lld %n", &lln, &n);
482 				if(ret < 1) {
483 					ret = -EIO;
484 					goto out;
485 				}
486 				eh->st.st_mtime = lln;
487 				if (value[n] == '.') {
488 					ret = sscanf(value + n + 1, "%d", &n);
489 					if (ret < 1) {
490 						ret = -EIO;
491 						goto out;
492 					}
493 					ST_MTIM_NSEC_SET(&eh->st, n);
494 				} else {
495 					ST_MTIM_NSEC_SET(&eh->st, 0);
496 				}
497 				eh->use_mtime = true;
498 			} else if (!strncmp(kv, "size=",
499 					sizeof("size=") - 1)) {
500 				ret = sscanf(value, "%lld %n", &lln, &n);
501 				if(ret < 1 || value[n] != '\0') {
502 					ret = -EIO;
503 					goto out;
504 				}
505 				eh->st.st_size = lln;
506 				eh->use_size = true;
507 			} else if (!strncmp(kv, "uid=", sizeof("uid=") - 1)) {
508 				ret = sscanf(value, "%lld %n", &lln, &n);
509 				if(ret < 1 || value[n] != '\0') {
510 					ret = -EIO;
511 					goto out;
512 				}
513 				eh->st.st_uid = lln;
514 				eh->use_uid = true;
515 			} else if (!strncmp(kv, "gid=", sizeof("gid=") - 1)) {
516 				ret = sscanf(value, "%lld %n", &lln, &n);
517 				if(ret < 1 || value[n] != '\0') {
518 					ret = -EIO;
519 					goto out;
520 				}
521 				eh->st.st_gid = lln;
522 				eh->use_gid = true;
523 			} else if (!strncmp(kv, "SCHILY.xattr.",
524 				   sizeof("SCHILY.xattr.") - 1)) {
525 				char *key = kv + sizeof("SCHILY.xattr.") - 1;
526 
527 				--len; /* p[-1] == '\0' */
528 				ret = tarerofs_insert_xattr(&eh->xattrs, key,
529 						value - key - 1,
530 						len - (key - kv), false);
531 				if (ret)
532 					goto out;
533 			} else if (!strncmp(kv, "LIBARCHIVE.xattr.",
534 				   sizeof("LIBARCHIVE.xattr.") - 1)) {
535 				char *key;
536 				key = kv + sizeof("LIBARCHIVE.xattr.") - 1;
537 
538 				--len; /* p[-1] == '\0' */
539 				ret = base64_decode(value, len - (value - kv),
540 						    (u8 *)value);
541 				if (ret < 0) {
542 					ret = -EFSCORRUPTED;
543 					goto out;
544 				}
545 
546 				ret = tarerofs_insert_xattr(&eh->xattrs, key,
547 						value - key - 1,
548 						value - key + ret, false);
549 				if (ret)
550 					goto out;
551 			} else {
552 				erofs_info("unrecognized pax keyword \"%s\", ignoring", kv);
553 			}
554 		}
555 	}
556 	ret = 0;
557 out:
558 	free(buf);
559 	return ret;
560 }
561 
tarerofs_remove_inode(struct erofs_inode * inode)562 void tarerofs_remove_inode(struct erofs_inode *inode)
563 {
564 	struct erofs_dentry *d;
565 
566 	--inode->i_nlink;
567 	if (!S_ISDIR(inode->i_mode))
568 		return;
569 
570 	/* remove all subdirss */
571 	list_for_each_entry(d, &inode->i_subdirs, d_child) {
572 		if (!is_dot_dotdot(d->name))
573 			tarerofs_remove_inode(d->inode);
574 		erofs_iput(d->inode);
575 		d->inode = NULL;
576 	}
577 	--inode->i_parent->i_nlink;
578 }
579 
tarerofs_write_file_data(struct erofs_inode * inode,struct erofs_tarfile * tar)580 static int tarerofs_write_file_data(struct erofs_inode *inode,
581 				    struct erofs_tarfile *tar)
582 {
583 	void *buf;
584 	int fd, nread;
585 	u64 off, j;
586 
587 	if (!inode->i_diskbuf) {
588 		inode->i_diskbuf = calloc(1, sizeof(*inode->i_diskbuf));
589 		if (!inode->i_diskbuf)
590 			return -ENOSPC;
591 	} else {
592 		erofs_diskbuf_close(inode->i_diskbuf);
593 	}
594 
595 	fd = erofs_diskbuf_reserve(inode->i_diskbuf, 0, &off);
596 	if (fd < 0)
597 		return -EBADF;
598 
599 	for (j = inode->i_size; j; ) {
600 		nread = erofs_iostream_read(&tar->ios, &buf, j);
601 		if (nread < 0)
602 			break;
603 		if (write(fd, buf, nread) != nread) {
604 			nread = -EIO;
605 			break;
606 		}
607 		j -= nread;
608 	}
609 	erofs_diskbuf_commit(inode->i_diskbuf, inode->i_size);
610 	inode->datasource = EROFS_INODE_DATA_SOURCE_DISKBUF;
611 	return 0;
612 }
613 
tarerofs_parse_tar(struct erofs_inode * root,struct erofs_tarfile * tar)614 int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar)
615 {
616 	char path[PATH_MAX];
617 	struct erofs_pax_header eh = tar->global;
618 	struct erofs_sb_info *sbi = root->sbi;
619 	bool whout, opq, e = false;
620 	struct stat st;
621 	erofs_off_t tar_offset, dataoff;
622 
623 	struct tar_header *th;
624 	struct erofs_dentry *d;
625 	struct erofs_inode *inode;
626 	unsigned int j, csum, cksum;
627 	int ckksum, ret, rem;
628 
629 	if (eh.path)
630 		eh.path = strdup(eh.path);
631 	if (eh.link)
632 		eh.link = strdup(eh.link);
633 	init_list_head(&eh.xattrs);
634 
635 restart:
636 	rem = tar->offset & 511;
637 	if (rem) {
638 		if (erofs_iostream_lskip(&tar->ios, 512 - rem)) {
639 			ret = -EIO;
640 			goto out;
641 		}
642 		tar->offset += 512 - rem;
643 	}
644 
645 	tar_offset = tar->offset;
646 	ret = erofs_iostream_read(&tar->ios, (void **)&th, sizeof(*th));
647 	if (ret != sizeof(*th)) {
648 		if (tar->headeronly_mode || tar->ddtaridx_mode) {
649 			ret = 1;
650 			goto out;
651 		}
652 		erofs_err("failed to read header block @ %llu", tar_offset);
653 		ret = -EIO;
654 		goto out;
655 	}
656 	tar->offset += sizeof(*th);
657 	if (*th->name == '\0') {
658 		if (e) {	/* end of tar 2 empty blocks */
659 			ret = 1;
660 			goto out;
661 		}
662 		e = true;	/* empty jump to next block */
663 		goto restart;
664 	}
665 
666 	/* chksum field itself treated as ' ' */
667 	csum = tarerofs_otoi(th->chksum, sizeof(th->chksum));
668 	if (errno) {
669 		erofs_err("invalid chksum @ %llu", tar_offset);
670 		ret = -EBADMSG;
671 		goto out;
672 	}
673 	cksum = 0;
674 	for (j = 0; j < 8; ++j)
675 		cksum += (unsigned int)' ';
676 	ckksum = cksum;
677 	for (j = 0; j < 148; ++j) {
678 		cksum += (unsigned int)((u8*)th)[j];
679 		ckksum += (int)((char*)th)[j];
680 	}
681 	for (j = 156; j < 500; ++j) {
682 		cksum += (unsigned int)((u8*)th)[j];
683 		ckksum += (int)((char*)th)[j];
684 	}
685 	if (!tar->ddtaridx_mode && csum != cksum && csum != ckksum) {
686 		erofs_err("chksum mismatch @ %llu", tar_offset);
687 		ret = -EBADMSG;
688 		goto out;
689 	}
690 
691 	if (th->typeflag == GNUTYPE_VOLHDR) {
692 		if (th->size[0])
693 			erofs_warn("GNUTYPE_VOLHDR with non-zeroed size @ %llu",
694 				   tar_offset);
695 		/* anyway, strncpy could cause some GCC warning here */
696 		memcpy(sbi->volume_name, th->name, sizeof(sbi->volume_name));
697 		goto restart;
698 	}
699 
700 	if (memcmp(th->magic, "ustar", 5)) {
701 		erofs_err("invalid tar magic @ %llu", tar_offset);
702 		ret = -EIO;
703 		goto out;
704 	}
705 
706 	st.st_mode = tarerofs_otoi(th->mode, sizeof(th->mode));
707 	if (errno)
708 		goto invalid_tar;
709 
710 	if (eh.use_uid) {
711 		st.st_uid = eh.st.st_uid;
712 	} else {
713 		st.st_uid = tarerofs_parsenum(th->uid, sizeof(th->uid));
714 		if (errno)
715 			goto invalid_tar;
716 	}
717 
718 	if (eh.use_gid) {
719 		st.st_gid = eh.st.st_gid;
720 	} else {
721 		st.st_gid = tarerofs_parsenum(th->gid, sizeof(th->gid));
722 		if (errno)
723 			goto invalid_tar;
724 	}
725 
726 	if (eh.use_size) {
727 		st.st_size = eh.st.st_size;
728 	} else {
729 		st.st_size = tarerofs_parsenum(th->size, sizeof(th->size));
730 		if (errno)
731 			goto invalid_tar;
732 	}
733 
734 	if (eh.use_mtime) {
735 		st.st_mtime = eh.st.st_mtime;
736 		ST_MTIM_NSEC_SET(&st, ST_MTIM_NSEC(&eh.st));
737 	} else {
738 		st.st_mtime = tarerofs_parsenum(th->mtime, sizeof(th->mtime));
739 		if (errno)
740 			goto invalid_tar;
741 		ST_MTIM_NSEC_SET(&st, 0);
742 	}
743 
744 	if (th->typeflag <= '7' && !eh.path) {
745 		eh.path = path;
746 		j = 0;
747 		if (*th->prefix) {
748 			memcpy(path, th->prefix, sizeof(th->prefix));
749 			path[sizeof(th->prefix)] = '\0';
750 			j = strlen(path);
751 			if (path[j - 1] != '/') {
752 				path[j] = '/';
753 				path[++j] = '\0';
754 			}
755 		}
756 		memcpy(path + j, th->name, sizeof(th->name));
757 		path[j + sizeof(th->name)] = '\0';
758 		j = strlen(path);
759 		while (path[j - 1] == '/')
760 			path[--j] = '\0';
761 	}
762 
763 	dataoff = tar->offset;
764 	if (!(tar->headeronly_mode || tar->ddtaridx_mode))
765 		tar->offset += st.st_size;
766 	switch(th->typeflag) {
767 	case '0':
768 	case '7':
769 	case '1':
770 		st.st_mode |= S_IFREG;
771 		break;
772 	case '2':
773 		st.st_mode |= S_IFLNK;
774 		break;
775 	case '3':
776 		st.st_mode |= S_IFCHR;
777 		break;
778 	case '4':
779 		st.st_mode |= S_IFBLK;
780 		break;
781 	case '5':
782 		st.st_mode |= S_IFDIR;
783 		break;
784 	case '6':
785 		st.st_mode |= S_IFIFO;
786 		break;
787 	case 'g':
788 		ret = tarerofs_parse_pax_header(&tar->ios, &tar->global,
789 						st.st_size);
790 		if (ret)
791 			goto out;
792 		if (tar->global.path) {
793 			free(eh.path);
794 			eh.path = strdup(tar->global.path);
795 		}
796 		if (tar->global.link) {
797 			free(eh.link);
798 			eh.link = strdup(tar->global.link);
799 		}
800 		goto restart;
801 	case 'x':
802 		ret = tarerofs_parse_pax_header(&tar->ios, &eh, st.st_size);
803 		if (ret)
804 			goto out;
805 		goto restart;
806 	case 'L':
807 		free(eh.path);
808 		eh.path = malloc(st.st_size + 1);
809 		if (st.st_size != erofs_iostream_bread(&tar->ios, eh.path,
810 						       st.st_size))
811 			goto invalid_tar;
812 		eh.path[st.st_size] = '\0';
813 		goto restart;
814 	case 'K':
815 		free(eh.link);
816 		eh.link = malloc(st.st_size + 1);
817 		if (st.st_size > PATH_MAX || st.st_size !=
818 		    erofs_iostream_bread(&tar->ios, eh.link, st.st_size))
819 			goto invalid_tar;
820 		eh.link[st.st_size] = '\0';
821 		goto restart;
822 	default:
823 		erofs_info("unrecognized typeflag %xh @ %llu - ignoring",
824 			   th->typeflag, tar_offset);
825 		(void)erofs_iostream_lskip(&tar->ios, st.st_size);
826 		ret = 0;
827 		goto out;
828 	}
829 
830 	st.st_rdev = 0;
831 	if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) {
832 		int major, minor;
833 
834 		major = tarerofs_parsenum(th->devmajor, sizeof(th->devmajor));
835 		if (errno) {
836 			erofs_err("invalid device major @ %llu", tar_offset);
837 			goto out;
838 		}
839 
840 		minor = tarerofs_parsenum(th->devminor, sizeof(th->devminor));
841 		if (errno) {
842 			erofs_err("invalid device minor @ %llu", tar_offset);
843 			goto out;
844 		}
845 
846 		st.st_rdev = (major << 8) | (minor & 0xff) | ((minor & ~0xff) << 12);
847 	} else if (th->typeflag == '1' || th->typeflag == '2') {
848 		if (!eh.link)
849 			eh.link = strndup(th->linkname, sizeof(th->linkname));
850 	}
851 
852 	/* EROFS metadata index referring to the original tar data */
853 	if (tar->index_mode && sbi->extra_devices &&
854 	    erofs_blkoff(sbi, dataoff)) {
855 		erofs_err("invalid tar data alignment @ %llu", tar_offset);
856 		ret = -EIO;
857 		goto out;
858 	}
859 
860 	erofs_dbg("parsing %s (mode %05o)", eh.path, st.st_mode);
861 
862 	d = erofs_rebuild_get_dentry(root, eh.path, tar->aufs, &whout, &opq, true);
863 	if (IS_ERR(d)) {
864 		ret = PTR_ERR(d);
865 		goto out;
866 	}
867 
868 	if (!d) {
869 		/* some tarballs include '.' which indicates the root directory */
870 		if (!S_ISDIR(st.st_mode)) {
871 			ret = -ENOTDIR;
872 			goto out;
873 		}
874 		inode = root;
875 	} else if (opq) {
876 		DBG_BUGON(d->type == EROFS_FT_UNKNOWN);
877 		DBG_BUGON(!d->inode);
878 		/*
879 		 * needed if the tar tree is used soon, thus we have no chance
880 		 * to generate it from xattrs.  No impact to mergefs.
881 		 */
882 		d->inode->opaque = true;
883 		ret = erofs_set_opaque_xattr(d->inode);
884 		goto out;
885 	} else if (th->typeflag == '1') {	/* hard link cases */
886 		struct erofs_dentry *d2;
887 		bool dumb;
888 
889 		if (S_ISDIR(st.st_mode)) {
890 			ret = -EISDIR;
891 			goto out;
892 		}
893 
894 		if (d->type != EROFS_FT_UNKNOWN) {
895 			tarerofs_remove_inode(d->inode);
896 			erofs_iput(d->inode);
897 		}
898 		d->inode = NULL;
899 
900 		d2 = erofs_rebuild_get_dentry(root, eh.link, tar->aufs,
901 					      &dumb, &dumb, false);
902 		if (IS_ERR(d2)) {
903 			ret = PTR_ERR(d2);
904 			goto out;
905 		}
906 		if (d2->type == EROFS_FT_UNKNOWN) {
907 			ret = -ENOENT;
908 			goto out;
909 		}
910 		if (S_ISDIR(d2->inode->i_mode)) {
911 			ret = -EISDIR;
912 			goto out;
913 		}
914 		inode = erofs_igrab(d2->inode);
915 		d->inode = inode;
916 		d->type = d2->type;
917 		++inode->i_nlink;
918 		ret = 0;
919 		goto out;
920 	} else if (d->type != EROFS_FT_UNKNOWN) {
921 		if (d->type != EROFS_FT_DIR || !S_ISDIR(st.st_mode)) {
922 			struct erofs_inode *parent = d->inode->i_parent;
923 
924 			tarerofs_remove_inode(d->inode);
925 			erofs_iput(d->inode);
926 			d->inode = parent;
927 			goto new_inode;
928 		}
929 		inode = d->inode;
930 	} else {
931 new_inode:
932 		inode = erofs_new_inode(sbi);
933 		if (IS_ERR(inode)) {
934 			ret = PTR_ERR(inode);
935 			goto out;
936 		}
937 		inode->dev = tar->dev;
938 		inode->i_parent = d->inode;
939 		d->inode = inode;
940 		d->type = erofs_mode_to_ftype(st.st_mode);
941 	}
942 
943 	if (whout) {
944 		inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFCHR;
945 		inode->u.i_rdev = EROFS_WHITEOUT_DEV;
946 		d->type = EROFS_FT_CHRDEV;
947 
948 		/*
949 		 * Mark the parent directory as copied-up to avoid exposing
950 		 * whiteouts if mounted.  See kernel commit b79e05aaa166
951 		 * ("ovl: no direct iteration for dir with origin xattr")
952 		 */
953 		inode->i_parent->whiteouts = true;
954 	} else {
955 		inode->i_mode = st.st_mode;
956 		if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode))
957 			inode->u.i_rdev = erofs_new_encode_dev(st.st_rdev);
958 	}
959 
960 	inode->i_srcpath = strdup(eh.path);
961 	if (!inode->i_srcpath) {
962 		ret = -ENOMEM;
963 		goto out;
964 	}
965 
966 	ret = __erofs_fill_inode(inode, &st, eh.path);
967 	if (ret)
968 		goto out;
969 	inode->i_size = st.st_size;
970 
971 	if (!S_ISDIR(inode->i_mode)) {
972 		if (S_ISLNK(inode->i_mode)) {
973 			inode->i_size = strlen(eh.link);
974 			inode->i_link = malloc(inode->i_size + 1);
975 			memcpy(inode->i_link, eh.link, inode->i_size + 1);
976 		} else if (inode->i_size) {
977 			if (tar->headeronly_mode) {
978 				ret = erofs_write_zero_inode(inode);
979 			} else if (tar->ddtaridx_mode) {
980 				dataoff = le64_to_cpu(*(__le64 *)(th->devmajor));
981 				if (tar->rvsp_mode) {
982 					inode->datasource = EROFS_INODE_DATA_SOURCE_RESVSP;
983 					inode->i_ino[1] = dataoff;
984 					ret = 0;
985 				} else {
986 					ret = tarerofs_write_chunkes(inode, dataoff);
987 				}
988 			} else if (tar->rvsp_mode) {
989 				inode->datasource = EROFS_INODE_DATA_SOURCE_RESVSP;
990 				inode->i_ino[1] = dataoff;
991 				if (erofs_iostream_lskip(&tar->ios, inode->i_size))
992 					ret = -EIO;
993 				else
994 					ret = 0;
995 			} else if (tar->index_mode) {
996 				ret = tarerofs_write_chunkes(inode, dataoff);
997 				if (!ret && erofs_iostream_lskip(&tar->ios,
998 								 inode->i_size))
999 					ret = -EIO;
1000 			} else {
1001 				ret = tarerofs_write_file_data(inode, tar);
1002 			}
1003 			if (ret)
1004 				goto out;
1005 		}
1006 		inode->i_nlink++;
1007 	} else if (!inode->i_nlink) {
1008 		ret = erofs_init_empty_dir(inode);
1009 		if (ret)
1010 			goto out;
1011 	}
1012 
1013 	ret = tarerofs_merge_xattrs(&eh.xattrs, &tar->global.xattrs);
1014 	if (ret)
1015 		goto out;
1016 
1017 	ret = tarerofs_apply_xattrs(inode, &eh.xattrs);
1018 
1019 out:
1020 	if (eh.path != path)
1021 		free(eh.path);
1022 	free(eh.link);
1023 	tarerofs_remove_xattrs(&eh.xattrs);
1024 	return ret;
1025 
1026 invalid_tar:
1027 	erofs_err("invalid tar @ %llu", tar_offset);
1028 	ret = -EIO;
1029 	goto out;
1030 }
1031