1 // Archive/ZipIn.cpp
2
3 #include "StdAfx.h"
4
5 // #include <stdio.h>
6
7 #include "../../../Common/DynamicBuffer.h"
8 #include "../../../Common/IntToString.h"
9 #include "../../../Common/MyException.h"
10 #include "../../../Common/StringToInt.h"
11
12 #include "../../../Windows/PropVariant.h"
13
14 #include "../IArchive.h"
15
16 #include "ZipIn.h"
17
18 #define Get16(p) GetUi16(p)
19 #define Get32(p) GetUi32(p)
20 #define Get64(p) GetUi64(p)
21
22 #define G16(offs, v) v = Get16(p + (offs))
23 #define G32(offs, v) v = Get32(p + (offs))
24 #define G64(offs, v) v = Get64(p + (offs))
25
26 namespace NArchive {
27 namespace NZip {
28
29 /* we try to use same size of Buffer (1 << 17) for all tasks.
30 it allow to avoid reallocations and cache clearing. */
31
32 static const size_t kSeqBufferSize = (size_t)1 << 17;
33
34 /*
35 Open()
36 {
37 _inBufMode = false;
38 ReadVols()
39 FindCd();
40 TryEcd64()
41 SeekToVol()
42 FindMarker()
43 _inBufMode = true;
44 ReadHeaders()
45 _inBufMode = false;
46 ReadCd()
47 FindCd()
48 TryEcd64()
49 TryReadCd()
50 {
51 SeekToVol();
52 _inBufMode = true;
53 }
54 _inBufMode = true;
55 ReadLocals()
56 ReadCdItem()
57 ....
58 }
59 FindCd() writes to Buffer without touching (_inBufMode)
60 */
61
62 /*
63 if (not defined ZIP_SELF_CHECK) : it reads CD and if error in first pass CD reading, it reads LOCALS-CD-MODE
64 if ( defined ZIP_SELF_CHECK) : it always reads CD and LOCALS-CD-MODE
65 use ZIP_SELF_CHECK to check LOCALS-CD-MODE for any zip archive
66 */
67
68 // #define ZIP_SELF_CHECK
69
70
71 struct CEcd
72 {
73 UInt16 ThisDisk;
74 UInt16 CdDisk;
75 UInt16 NumEntries_in_ThisDisk;
76 UInt16 NumEntries;
77 UInt32 Size;
78 UInt32 Offset;
79 UInt16 CommentSize;
80
IsEmptyArcNArchive::NZip::CEcd81 bool IsEmptyArc() const
82 {
83 return ThisDisk == 0
84 && CdDisk == 0
85 && NumEntries_in_ThisDisk == 0
86 && NumEntries == 0
87 && Size == 0
88 && Offset == 0 // test it
89 ;
90 }
91
92 void Parse(const Byte *p); // (p) doesn't include signature
93 };
94
Parse(const Byte * p)95 void CEcd::Parse(const Byte *p)
96 {
97 // (p) doesn't include signature
98 G16(0, ThisDisk);
99 G16(2, CdDisk);
100 G16(4, NumEntries_in_ThisDisk);
101 G16(6, NumEntries);
102 G32(8, Size);
103 G32(12, Offset);
104 G16(16, CommentSize);
105 }
106
107
ParseEcd32(const Byte * p)108 void CCdInfo::ParseEcd32(const Byte *p)
109 {
110 IsFromEcd64 = false;
111 // (p) includes signature
112 p += 4;
113 G16(0, ThisDisk);
114 G16(2, CdDisk);
115 G16(4, NumEntries_in_ThisDisk);
116 G16(6, NumEntries);
117 G32(8, Size);
118 G32(12, Offset);
119 G16(16, CommentSize);
120 }
121
ParseEcd64e(const Byte * p)122 void CCdInfo::ParseEcd64e(const Byte *p)
123 {
124 IsFromEcd64 = true;
125 // (p) exclude signature
126 G16(0, VersionMade);
127 G16(2, VersionNeedExtract);
128 G32(4, ThisDisk);
129 G32(8, CdDisk);
130
131 G64(12, NumEntries_in_ThisDisk);
132 G64(20, NumEntries);
133 G64(28, Size);
134 G64(36, Offset);
135 }
136
137
138 struct CLocator
139 {
140 UInt32 Ecd64Disk;
141 UInt32 NumDisks;
142 UInt64 Ecd64Offset;
143
CLocatorNArchive::NZip::CLocator144 CLocator(): Ecd64Disk(0), NumDisks(0), Ecd64Offset(0) {}
145
ParseNArchive::NZip::CLocator146 void Parse(const Byte *p)
147 {
148 G32(0, Ecd64Disk);
149 G64(4, Ecd64Offset);
150 G32(12, NumDisks);
151 }
152
IsEmptyArcNArchive::NZip::CLocator153 bool IsEmptyArc() const
154 {
155 return Ecd64Disk == 0 && NumDisks == 0 && Ecd64Offset == 0;
156 }
157 };
158
159
160
161
ClearRefs()162 void CInArchive::ClearRefs()
163 {
164 StreamRef.Release();
165 Stream = NULL;
166 StartStream = NULL;
167 Callback = NULL;
168
169 Vols.Clear();
170 }
171
Close()172 void CInArchive::Close()
173 {
174 _cnt = 0;
175 DisableBufMode();
176
177 IsArcOpen = false;
178
179 IsArc = false;
180 IsZip64 = false;
181
182 IsApk = false;
183 IsCdUnsorted = false;
184
185 HeadersError = false;
186 HeadersWarning = false;
187 ExtraMinorError = false;
188
189 UnexpectedEnd = false;
190 LocalsWereRead = false;
191 LocalsCenterMerged = false;
192 NoCentralDir = false;
193 Overflow32bit = false;
194 Cd_NumEntries_Overflow_16bit = false;
195
196 MarkerIsFound = false;
197 MarkerIsSafe = false;
198
199 IsMultiVol = false;
200 UseDisk_in_SingleVol = false;
201 EcdVolIndex = 0;
202
203 ArcInfo.Clear();
204
205 ClearRefs();
206 }
207
208
209
Seek_SavePos(UInt64 offset)210 HRESULT CInArchive::Seek_SavePos(UInt64 offset)
211 {
212 // InitBuf();
213 // if (!Stream) return S_FALSE;
214 return Stream->Seek((Int64)offset, STREAM_SEEK_SET, &_streamPos);
215 }
216
217
218 /* SeekToVol() will keep the cached mode, if new volIndex is
219 same Vols.StreamIndex volume, and offset doesn't go out of cached region */
220
SeekToVol(int volIndex,UInt64 offset)221 HRESULT CInArchive::SeekToVol(int volIndex, UInt64 offset)
222 {
223 if (volIndex != Vols.StreamIndex)
224 {
225 if (IsMultiVol && volIndex >= 0)
226 {
227 if ((unsigned)volIndex >= Vols.Streams.Size())
228 return S_FALSE;
229 if (!Vols.Streams[(unsigned)volIndex].Stream)
230 return S_FALSE;
231 Stream = Vols.Streams[(unsigned)volIndex].Stream;
232 }
233 else if (volIndex == -2)
234 {
235 if (!Vols.ZipStream)
236 return S_FALSE;
237 Stream = Vols.ZipStream;
238 }
239 else
240 Stream = StartStream;
241 Vols.StreamIndex = volIndex;
242 }
243 else
244 {
245 if (offset <= _streamPos)
246 {
247 const UInt64 back = _streamPos - offset;
248 if (back <= _bufCached)
249 {
250 _bufPos = _bufCached - (size_t)back;
251 return S_OK;
252 }
253 }
254 }
255 InitBuf();
256 return Seek_SavePos(offset);
257 }
258
259
AllocateBuffer(size_t size)260 HRESULT CInArchive::AllocateBuffer(size_t size)
261 {
262 if (size <= Buffer.Size())
263 return S_OK;
264 /* in cached mode virtual_pos is not equal to phy_pos (_streamPos)
265 so we change _streamPos and do Seek() to virtual_pos before cache clearing */
266 if (_bufPos != _bufCached)
267 {
268 RINOK(Seek_SavePos(GetVirtStreamPos()))
269 }
270 InitBuf();
271 Buffer.AllocAtLeast(size);
272 if (!Buffer.IsAllocated())
273 return E_OUTOFMEMORY;
274 return S_OK;
275 }
276
277 // ---------- ReadFromCache ----------
278 // reads from cache and from Stream
279 // move to next volume can be allowed if (CanStartNewVol) and only before first byte reading
280
ReadFromCache(Byte * data,unsigned size,unsigned & processed)281 HRESULT CInArchive::ReadFromCache(Byte *data, unsigned size, unsigned &processed)
282 {
283 HRESULT result = S_OK;
284 processed = 0;
285
286 for (;;)
287 {
288 if (size == 0)
289 return S_OK;
290
291 const size_t avail = GetAvail();
292
293 if (avail != 0)
294 {
295 unsigned cur = size;
296 if (cur > avail)
297 cur = (unsigned)avail;
298 memcpy(data, (const Byte *)Buffer + _bufPos, cur);
299
300 data += cur;
301 size -= cur;
302 processed += cur;
303
304 _bufPos += cur;
305 _cnt += cur;
306
307 CanStartNewVol = false;
308
309 continue;
310 }
311
312 InitBuf();
313
314 if (_inBufMode)
315 {
316 UInt32 cur = 0;
317 result = Stream->Read(Buffer, (UInt32)Buffer.Size(), &cur);
318 _bufPos = 0;
319 _bufCached = cur;
320 _streamPos += cur;
321 if (cur != 0)
322 CanStartNewVol = false;
323 if (result != S_OK)
324 break;
325 if (cur != 0)
326 continue;
327 }
328 else
329 {
330 size_t cur = size;
331 result = ReadStream(Stream, data, &cur);
332 data += cur;
333 size -= (unsigned)cur;
334 processed += (unsigned)cur;
335 _streamPos += cur;
336 _cnt += cur;
337 if (cur != 0)
338 {
339 CanStartNewVol = false;
340 break;
341 }
342 if (result != S_OK)
343 break;
344 }
345
346 if ( !IsMultiVol
347 || !CanStartNewVol
348 || Vols.StreamIndex < 0
349 || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size())
350 break;
351
352 const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex + 1];
353 if (!s.Stream)
354 break;
355 result = s.SeekToStart();
356 if (result != S_OK)
357 break;
358 Vols.StreamIndex++;
359 _streamPos = 0;
360 // Vols.NeedSeek = false;
361
362 Stream = s.Stream;
363 }
364
365 return result;
366 }
367
368
ReadFromCache_FALSE(Byte * data,unsigned size)369 HRESULT CInArchive::ReadFromCache_FALSE(Byte *data, unsigned size)
370 {
371 unsigned processed;
372 HRESULT res = ReadFromCache(data, size, processed);
373 if (res == S_OK && size != processed)
374 return S_FALSE;
375 return res;
376 }
377
378
CheckDosTime(UInt32 dosTime)379 static bool CheckDosTime(UInt32 dosTime)
380 {
381 if (dosTime == 0)
382 return true;
383 unsigned month = (dosTime >> 21) & 0xF;
384 unsigned day = (dosTime >> 16) & 0x1F;
385 unsigned hour = (dosTime >> 11) & 0x1F;
386 unsigned min = (dosTime >> 5) & 0x3F;
387 unsigned sec = (dosTime & 0x1F) * 2;
388 if (month < 1 || month > 12 || day < 1 || day > 31 || hour > 23 || min > 59 || sec > 59)
389 return false;
390 return true;
391 }
392
IsArc_Zip(const Byte * p,size_t size)393 API_FUNC_IsArc IsArc_Zip(const Byte *p, size_t size)
394 {
395 if (size < 8)
396 return k_IsArc_Res_NEED_MORE;
397 if (p[0] != 'P')
398 return k_IsArc_Res_NO;
399
400 UInt32 sig = Get32(p);
401
402 if (sig == NSignature::kNoSpan || sig == NSignature::kSpan)
403 {
404 p += 4;
405 size -= 4;
406 }
407
408 sig = Get32(p);
409
410 if (sig == NSignature::kEcd64)
411 {
412 if (size < kEcd64_FullSize)
413 return k_IsArc_Res_NEED_MORE;
414
415 const UInt64 recordSize = Get64(p + 4);
416 if ( recordSize < kEcd64_MainSize
417 || recordSize > kEcd64_MainSize + (1 << 20))
418 return k_IsArc_Res_NO;
419 CCdInfo cdInfo;
420 cdInfo.ParseEcd64e(p + 12);
421 if (!cdInfo.IsEmptyArc())
422 return k_IsArc_Res_NO;
423 return k_IsArc_Res_YES; // k_IsArc_Res_YES_2;
424 }
425
426 if (sig == NSignature::kEcd)
427 {
428 if (size < kEcdSize)
429 return k_IsArc_Res_NEED_MORE;
430 CEcd ecd;
431 ecd.Parse(p + 4);
432 // if (ecd.cdSize != 0)
433 if (!ecd.IsEmptyArc())
434 return k_IsArc_Res_NO;
435 return k_IsArc_Res_YES; // k_IsArc_Res_YES_2;
436 }
437
438 if (sig != NSignature::kLocalFileHeader)
439 return k_IsArc_Res_NO;
440
441 if (size < kLocalHeaderSize)
442 return k_IsArc_Res_NEED_MORE;
443
444 p += 4;
445
446 {
447 const unsigned kPureHeaderSize = kLocalHeaderSize - 4;
448 unsigned i;
449 for (i = 0; i < kPureHeaderSize && p[i] == 0; i++);
450 if (i == kPureHeaderSize)
451 return k_IsArc_Res_NEED_MORE;
452 }
453
454 /*
455 if (p[0] >= 128) // ExtractVersion.Version;
456 return k_IsArc_Res_NO;
457 */
458
459 // ExtractVersion.Version = p[0];
460 // ExtractVersion.HostOS = p[1];
461 // Flags = Get16(p + 2);
462 // Method = Get16(p + 4);
463 /*
464 // 9.33: some zip archives contain incorrect value in timestamp. So we don't check it now
465 UInt32 dosTime = Get32(p + 6);
466 if (!CheckDosTime(dosTime))
467 return k_IsArc_Res_NO;
468 */
469 // Crc = Get32(p + 10);
470 // PackSize = Get32(p + 14);
471 // Size = Get32(p + 18);
472 const unsigned nameSize = Get16(p + 22);
473 unsigned extraSize = Get16(p + 24);
474 const UInt32 extraOffset = kLocalHeaderSize + (UInt32)nameSize;
475
476 /*
477 // 21.02: fixed. we don't use the following check
478 if (extraOffset + extraSize > (1 << 16))
479 return k_IsArc_Res_NO;
480 */
481
482 p -= 4;
483
484 {
485 size_t rem = size - kLocalHeaderSize;
486 if (rem > nameSize)
487 rem = nameSize;
488 const Byte *p2 = p + kLocalHeaderSize;
489 for (size_t i = 0; i < rem; i++)
490 if (p2[i] == 0)
491 {
492 // we support some "bad" zip archives that contain zeros after name
493 for (size_t k = i + 1; k < rem; k++)
494 if (p2[k] != 0)
495 return k_IsArc_Res_NO;
496 break;
497 /*
498 if (i != nameSize - 1)
499 return k_IsArc_Res_NO;
500 */
501 }
502 }
503
504 if (size < extraOffset)
505 return k_IsArc_Res_NEED_MORE;
506
507 if (extraSize > 0)
508 {
509 p += extraOffset;
510 size -= extraOffset;
511 while (extraSize != 0)
512 {
513 if (extraSize < 4)
514 {
515 // 7-Zip before 9.31 created incorrect WzAES Extra in folder's local headers.
516 // so we return k_IsArc_Res_YES to support such archives.
517 // return k_IsArc_Res_NO; // do we need to support such extra ?
518 return k_IsArc_Res_YES;
519 }
520 if (size < 4)
521 return k_IsArc_Res_NEED_MORE;
522 unsigned dataSize = Get16(p + 2);
523 size -= 4;
524 extraSize -= 4;
525 p += 4;
526 if (dataSize > extraSize)
527 {
528 // It can be error on header.
529 // We want to support such rare case bad archives.
530 // We use additional checks to reduce false-positive probability.
531 if (nameSize == 0
532 || nameSize > (1 << 9)
533 || extraSize > (1 << 9))
534 return k_IsArc_Res_NO;
535 return k_IsArc_Res_YES;
536 }
537 if (dataSize > size)
538 return k_IsArc_Res_NEED_MORE;
539 size -= dataSize;
540 extraSize -= dataSize;
541 p += dataSize;
542 }
543 }
544
545 return k_IsArc_Res_YES;
546 }
547
IsArc_Zip_2(const Byte * p,size_t size,bool isFinal)548 static UInt32 IsArc_Zip_2(const Byte *p, size_t size, bool isFinal)
549 {
550 UInt32 res = IsArc_Zip(p, size);
551 if (res == k_IsArc_Res_NEED_MORE && isFinal)
552 return k_IsArc_Res_NO;
553 return res;
554 }
555
556
557
558 /* FindPK_4() is allowed to access data up to and including &limit[3].
559 limit[4] access is not allowed.
560 return:
561 (return_ptr < limit) : "PK" was found at (return_ptr)
562 (return_ptr >= limit) : limit was reached or crossed. So no "PK" found before limit
563 */
564 Z7_NO_INLINE
FindPK_4(const Byte * p,const Byte * limit)565 static const Byte *FindPK_4(const Byte *p, const Byte *limit)
566 {
567 for (;;)
568 {
569 for (;;)
570 {
571 if (p >= limit)
572 return limit;
573 Byte b = p[1];
574 if (b == 0x4B) { if (p[0] == 0x50) { return p; } p += 1; break; }
575 if (b == 0x50) { if (p[2] == 0x4B) { return p + 1; } p += 2; break; }
576 b = p[3];
577 p += 4;
578 if (b == 0x4B) { if (p[-2]== 0x50) { return p - 2; } p -= 1; break; }
579 if (b == 0x50) { if (p[0] == 0x4B) { return p - 1; } break; }
580 }
581 }
582 /*
583 for (;;)
584 {
585 for (;;)
586 {
587 if (p >= limit)
588 return limit;
589 if (*p++ == 0x50) break;
590 if (*p++ == 0x50) break;
591 if (*p++ == 0x50) break;
592 if (*p++ == 0x50) break;
593 }
594 if (*p == 0x4B)
595 return p - 1;
596 }
597 */
598 }
599
600
601 /*
602 ---------- FindMarker ----------
603 returns:
604 S_OK:
605 ArcInfo.MarkerVolIndex : volume of marker
606 ArcInfo.MarkerPos : Pos of first signature
607 ArcInfo.MarkerPos2 : Pos of main signature (local item signature in most cases)
608 _streamPos : stream pos
609 _cnt : The number of virtal Bytes after start of search to offset after signature
610 _signature : main signature
611
612 S_FALSE: can't find marker, or there is some non-zip data after marker
613
614 Error code: stream reading error.
615 */
616
FindMarker(const UInt64 * searchLimit)617 HRESULT CInArchive::FindMarker(const UInt64 *searchLimit)
618 {
619 ArcInfo.MarkerPos = GetVirtStreamPos();
620 ArcInfo.MarkerPos2 = ArcInfo.MarkerPos;
621 ArcInfo.MarkerVolIndex = Vols.StreamIndex;
622
623 _cnt = 0;
624
625 CanStartNewVol = false;
626
627 if (searchLimit && *searchLimit == 0)
628 {
629 Byte startBuf[kMarkerSize];
630 RINOK(ReadFromCache_FALSE(startBuf, kMarkerSize))
631
632 UInt32 marker = Get32(startBuf);
633 _signature = marker;
634
635 if ( marker == NSignature::kNoSpan
636 || marker == NSignature::kSpan)
637 {
638 RINOK(ReadFromCache_FALSE(startBuf, kMarkerSize))
639 _signature = Get32(startBuf);
640 }
641
642 if ( _signature != NSignature::kEcd
643 && _signature != NSignature::kEcd64
644 && _signature != NSignature::kLocalFileHeader)
645 return S_FALSE;
646
647 ArcInfo.MarkerPos2 = GetVirtStreamPos() - 4;
648 ArcInfo.IsSpanMode = (marker == NSignature::kSpan);
649
650 // we use weak test in case of (*searchLimit == 0)
651 // since error will be detected later in Open function
652 return S_OK;
653 }
654
655 // zip specification: (_zip_header_size < (1 << 16))
656 // so we need such size to check header
657 const size_t kCheckSize = (size_t)1 << 16;
658 const size_t kBufSize = (size_t)1 << 17; // (kBufSize must be > kCheckSize)
659
660 RINOK(AllocateBuffer(kBufSize))
661
662 _inBufMode = true;
663
664 UInt64 progressPrev = 0;
665
666 for (;;)
667 {
668 RINOK(LookAhead(kBufSize))
669
670 const size_t avail = GetAvail();
671
672 size_t limitPos;
673 // (avail > kBufSize) is possible, if (Buffer.Size() > kBufSize)
674 const bool isFinished = (avail < kBufSize);
675 if (isFinished)
676 {
677 const unsigned kMinAllowed = 4;
678 if (avail <= kMinAllowed)
679 {
680 if ( !IsMultiVol
681 || Vols.StreamIndex < 0
682 || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size())
683 break;
684
685 SkipLookahed(avail);
686
687 const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex + 1];
688 if (!s.Stream)
689 break;
690
691 RINOK(s.SeekToStart())
692
693 InitBuf();
694 Vols.StreamIndex++;
695 _streamPos = 0;
696 Stream = s.Stream;
697 continue;
698 }
699 limitPos = avail - kMinAllowed;
700 }
701 else
702 limitPos = (avail - kCheckSize);
703
704 // we don't check at (limitPos) for good fast aligned operations
705
706 if (searchLimit)
707 {
708 if (_cnt > *searchLimit)
709 break;
710 UInt64 rem = *searchLimit - _cnt;
711 if (limitPos > rem)
712 limitPos = (size_t)rem + 1;
713 }
714
715 if (limitPos == 0)
716 break;
717
718 const Byte * const pStart = Buffer + _bufPos;
719 const Byte * p = pStart;
720 const Byte * const limit = pStart + limitPos;
721
722 for (;; p++)
723 {
724 p = FindPK_4(p, limit);
725 if (p >= limit)
726 break;
727 size_t rem = (size_t)(pStart + avail - p);
728 /* 22.02 : we limit check size with kCheckSize to be consistent for
729 any different combination of _bufPos in Buffer and size of Buffer. */
730 if (rem > kCheckSize)
731 rem = kCheckSize;
732 const UInt32 res = IsArc_Zip_2(p, rem, isFinished);
733 if (res != k_IsArc_Res_NO)
734 {
735 if (rem < kMarkerSize)
736 return S_FALSE;
737 _signature = Get32(p);
738 SkipLookahed((size_t)(p - pStart));
739 ArcInfo.MarkerVolIndex = Vols.StreamIndex;
740 ArcInfo.MarkerPos = GetVirtStreamPos();
741 ArcInfo.MarkerPos2 = ArcInfo.MarkerPos;
742 SkipLookahed(4);
743 if ( _signature == NSignature::kNoSpan
744 || _signature == NSignature::kSpan)
745 {
746 if (rem < kMarkerSize * 2)
747 return S_FALSE;
748 ArcInfo.IsSpanMode = (_signature == NSignature::kSpan);
749 _signature = Get32(p + 4);
750 ArcInfo.MarkerPos2 += 4;
751 SkipLookahed(4);
752 }
753 return S_OK;
754 }
755 }
756
757 if (!IsMultiVol && isFinished)
758 break;
759
760 SkipLookahed((size_t)(p - pStart));
761
762 if (Callback && (_cnt - progressPrev) >= ((UInt32)1 << 23))
763 {
764 progressPrev = _cnt;
765 // const UInt64 numFiles64 = 0;
766 RINOK(Callback->SetCompleted(NULL, &_cnt))
767 }
768 }
769
770 return S_FALSE;
771 }
772
773
774 /*
775 ---------- IncreaseRealPosition ----------
776 moves virtual offset in virtual stream.
777 changing to new volumes is allowed
778 */
779
IncreaseRealPosition(UInt64 offset,bool & isFinished)780 HRESULT CInArchive::IncreaseRealPosition(UInt64 offset, bool &isFinished)
781 {
782 isFinished = false;
783
784 for (;;)
785 {
786 const size_t avail = GetAvail();
787
788 if (offset <= avail)
789 {
790 _bufPos += (size_t)offset;
791 _cnt += offset;
792 return S_OK;
793 }
794
795 _cnt += avail;
796 offset -= avail;
797
798 _bufCached = 0;
799 _bufPos = 0;
800
801 if (!_inBufMode)
802 break;
803
804 CanStartNewVol = true;
805 LookAhead(1);
806
807 if (GetAvail() == 0)
808 return S_OK;
809 }
810
811 // cache is empty
812
813 if (!IsMultiVol)
814 {
815 _cnt += offset;
816 return Stream->Seek((Int64)offset, STREAM_SEEK_CUR, &_streamPos);
817 }
818
819 for (;;)
820 {
821 if (offset == 0)
822 return S_OK;
823
824 if (Vols.StreamIndex < 0)
825 return S_FALSE;
826 if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size())
827 {
828 isFinished = true;
829 return S_OK;
830 }
831 {
832 const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex];
833 if (!s.Stream)
834 {
835 isFinished = true;
836 return S_OK;
837 }
838 if (_streamPos > s.Size)
839 return S_FALSE;
840 const UInt64 rem = s.Size - _streamPos;
841 if ((UInt64)offset <= rem)
842 {
843 _cnt += offset;
844 return Stream->Seek((Int64)offset, STREAM_SEEK_CUR, &_streamPos);
845 }
846 RINOK(Seek_SavePos(s.Size))
847 offset -= rem;
848 _cnt += rem;
849 }
850
851 Stream = NULL;
852 _streamPos = 0;
853 Vols.StreamIndex++;
854 if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size())
855 {
856 isFinished = true;
857 return S_OK;
858 }
859 const CVols::CSubStreamInfo &s2 = Vols.Streams[(unsigned)Vols.StreamIndex];
860 if (!s2.Stream)
861 {
862 isFinished = true;
863 return S_OK;
864 }
865 Stream = s2.Stream;
866 RINOK(Seek_SavePos(0))
867 }
868 }
869
870
871
872 /*
873 ---------- LookAhead ----------
874 Reads data to buffer, if required.
875
876 It can read from volumes as long as Buffer.Size().
877 But it moves to new volume, only if it's required to provide minRequired bytes in buffer.
878
879 in:
880 (minRequired <= Buffer.Size())
881
882 return:
883 S_OK : if (GetAvail() < minRequired) after function return, it's end of stream(s) data, or no new volume stream.
884 Error codes: IInStream::Read() error or IInStream::Seek() error for multivol
885 */
886
LookAhead(size_t minRequired)887 HRESULT CInArchive::LookAhead(size_t minRequired)
888 {
889 for (;;)
890 {
891 const size_t avail = GetAvail();
892
893 if (minRequired <= avail)
894 return S_OK;
895
896 if (_bufPos != 0)
897 {
898 if (avail != 0)
899 memmove(Buffer, Buffer + _bufPos, avail);
900 _bufPos = 0;
901 _bufCached = avail;
902 }
903
904 const size_t pos = _bufCached;
905 UInt32 processed = 0;
906 HRESULT res = Stream->Read(Buffer + pos, (UInt32)(Buffer.Size() - pos), &processed);
907 _streamPos += processed;
908 _bufCached += processed;
909
910 if (res != S_OK)
911 return res;
912
913 if (processed != 0)
914 continue;
915
916 if ( !IsMultiVol
917 || !CanStartNewVol
918 || Vols.StreamIndex < 0
919 || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size())
920 return S_OK;
921
922 const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex + 1];
923 if (!s.Stream)
924 return S_OK;
925
926 RINOK(s.SeekToStart())
927
928 Vols.StreamIndex++;
929 _streamPos = 0;
930 Stream = s.Stream;
931 // Vols.NeedSeek = false;
932 }
933 }
934
935
936 class CUnexpectEnd {};
937
938
939 /*
940 ---------- SafeRead ----------
941
942 reads data of exact size from stream(s)
943
944 in:
945 _inBufMode
946 if (CanStartNewVol) it can go to next volume before first byte reading, if there is end of volume data.
947
948 in, out:
949 _streamPos : position in Stream
950 Stream
951 Vols : if (IsMultiVol)
952 _cnt
953
954 out:
955 (CanStartNewVol == false), if some data was read
956
957 return:
958 S_OK : success reading of requested data
959
960 exceptions:
961 CSystemException() - stream reading error
962 CUnexpectEnd() : could not read data of requested size
963 */
964
SafeRead(Byte * data,unsigned size)965 void CInArchive::SafeRead(Byte *data, unsigned size)
966 {
967 unsigned processed;
968 HRESULT result = ReadFromCache(data, size, processed);
969 if (result != S_OK)
970 throw CSystemException(result);
971 if (size != processed)
972 throw CUnexpectEnd();
973 }
974
ReadBuffer(CByteBuffer & buffer,unsigned size)975 void CInArchive::ReadBuffer(CByteBuffer &buffer, unsigned size)
976 {
977 buffer.Alloc(size);
978 if (size != 0)
979 SafeRead(buffer, size);
980 }
981
982 // Byte CInArchive::ReadByte () { Byte b; SafeRead(&b, 1); return b; }
983 // UInt16 CInArchive::ReadUInt16() { Byte buf[2]; SafeRead(buf, 2); return Get16(buf); }
ReadUInt32()984 UInt32 CInArchive::ReadUInt32() { Byte buf[4]; SafeRead(buf, 4); return Get32(buf); }
ReadUInt64()985 UInt64 CInArchive::ReadUInt64() { Byte buf[8]; SafeRead(buf, 8); return Get64(buf); }
986
ReadSignature()987 void CInArchive::ReadSignature()
988 {
989 CanStartNewVol = true;
990 _signature = ReadUInt32();
991 // CanStartNewVol = false; // it's already changed in SafeRead
992 }
993
994
995 // we Skip() inside headers only, so no need for stream change in multivol.
996
Skip(size_t num)997 void CInArchive::Skip(size_t num)
998 {
999 while (num != 0)
1000 {
1001 const unsigned kBufSize = (size_t)1 << 10;
1002 Byte buf[kBufSize];
1003 unsigned step = kBufSize;
1004 if (step > num)
1005 step = (unsigned)num;
1006 SafeRead(buf, step);
1007 num -= step;
1008 }
1009 }
1010
1011 /*
1012 HRESULT CInArchive::Callback_Completed(unsigned numFiles)
1013 {
1014 const UInt64 numFiles64 = numFiles;
1015 return Callback->SetCompleted(&numFiles64, &_cnt);
1016 }
1017 */
1018
Skip64(UInt64 num,unsigned numFiles)1019 HRESULT CInArchive::Skip64(UInt64 num, unsigned numFiles)
1020 {
1021 if (num == 0)
1022 return S_OK;
1023
1024 for (;;)
1025 {
1026 size_t step = (size_t)1 << 24;
1027 if (step > num)
1028 step = (size_t)num;
1029 Skip(step);
1030 num -= step;
1031 if (num == 0)
1032 return S_OK;
1033 if (Callback)
1034 {
1035 const UInt64 numFiles64 = numFiles;
1036 RINOK(Callback->SetCompleted(&numFiles64, &_cnt))
1037 }
1038 }
1039 }
1040
1041
ReadFileName(unsigned size,AString & s)1042 bool CInArchive::ReadFileName(unsigned size, AString &s)
1043 {
1044 if (size == 0)
1045 {
1046 s.Empty();
1047 return true;
1048 }
1049 char *p = s.GetBuf(size);
1050 SafeRead((Byte *)p, size);
1051 unsigned i = size;
1052 do
1053 {
1054 if (p[i - 1] != 0)
1055 break;
1056 }
1057 while (--i);
1058 s.ReleaseBuf_CalcLen(size);
1059 return s.Len() == i;
1060 }
1061
1062
1063 #define ZIP64_IS_32_MAX(n) ((n) == 0xFFFFFFFF)
1064 #define ZIP64_IS_16_MAX(n) ((n) == 0xFFFF)
1065
1066
ReadExtra(const CLocalItem & item,unsigned extraSize,CExtraBlock & extra,UInt64 & unpackSize,UInt64 & packSize,CItem * cdItem)1067 bool CInArchive::ReadExtra(const CLocalItem &item, unsigned extraSize, CExtraBlock &extra,
1068 UInt64 &unpackSize, UInt64 &packSize,
1069 CItem *cdItem)
1070 {
1071 extra.Clear();
1072
1073 while (extraSize >= 4)
1074 {
1075 CExtraSubBlock subBlock;
1076 const UInt32 pair = ReadUInt32();
1077 subBlock.ID = (pair & 0xFFFF);
1078 unsigned size = (unsigned)(pair >> 16);
1079 // const unsigned origSize = size;
1080
1081 extraSize -= 4;
1082
1083 if (size > extraSize)
1084 {
1085 // it's error in extra
1086 HeadersWarning = true;
1087 extra.Error = true;
1088 Skip(extraSize);
1089 return false;
1090 }
1091
1092 extraSize -= size;
1093
1094 if (subBlock.ID == NFileHeader::NExtraID::kZip64)
1095 {
1096 extra.IsZip64 = true;
1097 bool isOK = true;
1098
1099 if (!cdItem
1100 && size == 16
1101 && !ZIP64_IS_32_MAX(unpackSize)
1102 && !ZIP64_IS_32_MAX(packSize))
1103 {
1104 /* Win10 Explorer's "Send to Zip" for big (3500 MiB) files
1105 creates Zip64 Extra in local file header.
1106 But if both uncompressed and compressed sizes are smaller than 4 GiB,
1107 Win10 doesn't store 0xFFFFFFFF in 32-bit fields as expected by zip specification.
1108 21.04: we ignore these minor errors in Win10 zip archives. */
1109 if (ReadUInt64() != unpackSize)
1110 isOK = false;
1111 if (ReadUInt64() != packSize)
1112 isOK = false;
1113 size = 0;
1114 }
1115 else
1116 {
1117 if (ZIP64_IS_32_MAX(unpackSize))
1118 { if (size < 8) isOK = false; else { size -= 8; unpackSize = ReadUInt64(); }}
1119
1120 if (isOK && ZIP64_IS_32_MAX(packSize))
1121 { if (size < 8) isOK = false; else { size -= 8; packSize = ReadUInt64(); }}
1122
1123 if (cdItem)
1124 {
1125 if (isOK)
1126 {
1127 if (ZIP64_IS_32_MAX(cdItem->LocalHeaderPos))
1128 { if (size < 8) isOK = false; else { size -= 8; cdItem->LocalHeaderPos = ReadUInt64(); }}
1129 /*
1130 else if (size == 8)
1131 {
1132 size -= 8;
1133 const UInt64 v = ReadUInt64();
1134 // soong_zip, an AOSP tool (written in the Go) writes incorrect value.
1135 // we can ignore that minor error here
1136 if (v != cdItem->LocalHeaderPos)
1137 isOK = false; // ignore error
1138 // isOK = false; // force error
1139 }
1140 */
1141 }
1142
1143 if (isOK && ZIP64_IS_16_MAX(cdItem->Disk))
1144 { if (size < 4) isOK = false; else { size -= 4; cdItem->Disk = ReadUInt32(); }}
1145 }
1146 }
1147
1148 // we can ignore errors, when some zip archiver still write all fields to zip64 extra in local header
1149 // if (&& (cdItem || !isOK || origSize != 8 * 3 + 4 || size != 8 * 1 + 4))
1150 if (!isOK || size != 0)
1151 {
1152 HeadersWarning = true;
1153 extra.Error = true;
1154 extra.IsZip64_Error = true;
1155 }
1156 Skip(size);
1157 }
1158 else
1159 {
1160 ReadBuffer(subBlock.Data, size);
1161 extra.SubBlocks.Add(subBlock);
1162 if (subBlock.ID == NFileHeader::NExtraID::kIzUnicodeName)
1163 {
1164 if (!subBlock.CheckIzUnicode(item.Name))
1165 extra.Error = true;
1166 }
1167 }
1168 }
1169
1170 if (extraSize != 0)
1171 {
1172 ExtraMinorError = true;
1173 extra.MinorError = true;
1174 // 7-Zip before 9.31 created incorrect WzAES Extra in folder's local headers.
1175 // so we don't return false, but just set warning flag
1176 // return false;
1177 Skip(extraSize);
1178 }
1179
1180 return true;
1181 }
1182
1183
ReadLocalItem(CItemEx & item)1184 bool CInArchive::ReadLocalItem(CItemEx &item)
1185 {
1186 item.Disk = 0;
1187 if (IsMultiVol && Vols.StreamIndex >= 0)
1188 item.Disk = (UInt32)Vols.StreamIndex;
1189 const unsigned kPureHeaderSize = kLocalHeaderSize - 4;
1190 Byte p[kPureHeaderSize];
1191 SafeRead(p, kPureHeaderSize);
1192 {
1193 unsigned i;
1194 for (i = 0; i < kPureHeaderSize && p[i] == 0; i++);
1195 if (i == kPureHeaderSize)
1196 return false;
1197 }
1198
1199 item.ExtractVersion.Version = p[0];
1200 item.ExtractVersion.HostOS = p[1];
1201 G16(2, item.Flags);
1202 G16(4, item.Method);
1203 G32(6, item.Time);
1204 G32(10, item.Crc);
1205 G32(14, item.PackSize);
1206 G32(18, item.Size);
1207 const unsigned nameSize = Get16(p + 22);
1208 const unsigned extraSize = Get16(p + 24);
1209 bool isOkName = ReadFileName(nameSize, item.Name);
1210 item.LocalFullHeaderSize = kLocalHeaderSize + (UInt32)nameSize + extraSize;
1211 item.DescriptorWasRead = false;
1212
1213 /*
1214 if (item.IsDir())
1215 item.Size = 0; // check It
1216 */
1217
1218 if (extraSize > 0)
1219 {
1220 if (!ReadExtra(item, extraSize, item.LocalExtra, item.Size, item.PackSize, NULL))
1221 {
1222 /* Most of archives are OK for Extra. But there are some rare cases
1223 that have error. And if error in first item, it can't open archive.
1224 So we ignore that error */
1225 // return false;
1226 }
1227 }
1228
1229 if (!CheckDosTime(item.Time))
1230 {
1231 HeadersWarning = true;
1232 // return false;
1233 }
1234
1235 if (item.Name.Len() != nameSize)
1236 {
1237 // we support some "bad" zip archives that contain zeros after name
1238 if (!isOkName)
1239 return false;
1240 HeadersWarning = true;
1241 }
1242
1243 // return item.LocalFullHeaderSize <= ((UInt32)1 << 16);
1244 return true;
1245 }
1246
1247
FlagsAreSame(const CItem & i1,const CItem & i2_cd)1248 static bool FlagsAreSame(const CItem &i1, const CItem &i2_cd)
1249 {
1250 if (i1.Method != i2_cd.Method)
1251 return false;
1252
1253 UInt32 mask = i1.Flags ^ i2_cd.Flags;
1254 if (mask == 0)
1255 return true;
1256 switch (i1.Method)
1257 {
1258 case NFileHeader::NCompressionMethod::kDeflate:
1259 mask &= 0x7FF9;
1260 break;
1261 default:
1262 if (i1.Method <= NFileHeader::NCompressionMethod::kImplode)
1263 mask &= 0x7FFF;
1264 }
1265
1266 // we can ignore utf8 flag, if name is ascii, or if only cdItem has utf8 flag
1267 if (mask & NFileHeader::NFlags::kUtf8)
1268 if ((i1.Name.IsAscii() && i2_cd.Name.IsAscii())
1269 || (i2_cd.Flags & NFileHeader::NFlags::kUtf8))
1270 mask &= ~NFileHeader::NFlags::kUtf8;
1271
1272 // some bad archive in rare case can use descriptor without descriptor flag in Central Dir
1273 // if (i1.HasDescriptor())
1274 mask &= ~NFileHeader::NFlags::kDescriptorUsedMask;
1275
1276 return (mask == 0);
1277 }
1278
1279
1280 // #ifdef _WIN32
AreEqualPaths_IgnoreSlashes(const char * s1,const char * s2)1281 static bool AreEqualPaths_IgnoreSlashes(const char *s1, const char *s2)
1282 {
1283 for (;;)
1284 {
1285 char c1 = *s1++;
1286 char c2 = *s2++;
1287 if (c1 == c2)
1288 {
1289 if (c1 == 0)
1290 return true;
1291 }
1292 else
1293 {
1294 if (c1 == '\\') c1 = '/';
1295 if (c2 == '\\') c2 = '/';
1296 if (c1 != c2)
1297 return false;
1298 }
1299 }
1300 }
1301 // #endif
1302
1303
AreItemsEqual(const CItemEx & localItem,const CItemEx & cdItem)1304 static bool AreItemsEqual(const CItemEx &localItem, const CItemEx &cdItem)
1305 {
1306 if (!FlagsAreSame(localItem, cdItem))
1307 return false;
1308 if (!localItem.HasDescriptor())
1309 {
1310 if (cdItem.PackSize != localItem.PackSize
1311 || cdItem.Size != localItem.Size
1312 || (cdItem.Crc != localItem.Crc && cdItem.Crc != 0)) // some program writes 0 to crc field in central directory
1313 return false;
1314 }
1315 /* pkzip 2.50 creates incorrect archives. It uses
1316 - WIN encoding for name in local header
1317 - OEM encoding for name in central header
1318 We don't support these strange items. */
1319
1320 /* if (cdItem.Name.Len() != localItem.Name.Len())
1321 return false;
1322 */
1323 if (cdItem.Name != localItem.Name)
1324 {
1325 // #ifdef _WIN32
1326 // some xap files use backslash in central dir items.
1327 // we can ignore such errors in windows, where all slashes are converted to backslashes
1328 unsigned hostOs = cdItem.GetHostOS();
1329
1330 if (hostOs == NFileHeader::NHostOS::kFAT ||
1331 hostOs == NFileHeader::NHostOS::kNTFS)
1332 {
1333 if (!AreEqualPaths_IgnoreSlashes(cdItem.Name, localItem.Name))
1334 {
1335 // pkzip 2.50 uses DOS encoding in central dir and WIN encoding in local header.
1336 // so we ignore that error
1337 if (hostOs != NFileHeader::NHostOS::kFAT
1338 || cdItem.MadeByVersion.Version < 25
1339 || cdItem.MadeByVersion.Version > 40)
1340 return false;
1341 }
1342 }
1343 /*
1344 else
1345 #endif
1346 return false;
1347 */
1348 }
1349 return true;
1350 }
1351
1352
Read_LocalItem_After_CdItem(CItemEx & item,bool & isAvail,bool & headersError)1353 HRESULT CInArchive::Read_LocalItem_After_CdItem(CItemEx &item, bool &isAvail, bool &headersError)
1354 {
1355 isAvail = true;
1356 headersError = false;
1357 if (item.FromLocal)
1358 return S_OK;
1359 try
1360 {
1361 UInt64 offset = item.LocalHeaderPos;
1362
1363 if (IsMultiVol)
1364 {
1365 if (item.Disk >= Vols.Streams.Size())
1366 {
1367 isAvail = false;
1368 return S_FALSE;
1369 }
1370 Stream = Vols.Streams[item.Disk].Stream;
1371 Vols.StreamIndex = (int)item.Disk;
1372 if (!Stream)
1373 {
1374 isAvail = false;
1375 return S_FALSE;
1376 }
1377 }
1378 else
1379 {
1380 if (UseDisk_in_SingleVol && item.Disk != EcdVolIndex)
1381 {
1382 isAvail = false;
1383 return S_FALSE;
1384 }
1385 Stream = StreamRef;
1386
1387 offset = (UInt64)((Int64)offset + ArcInfo.Base);
1388 if (ArcInfo.Base < 0 && (Int64)offset < 0)
1389 {
1390 isAvail = false;
1391 return S_FALSE;
1392 }
1393 }
1394
1395 _inBufMode = false;
1396 RINOK(Seek_SavePos(offset))
1397 InitBuf();
1398 /*
1399 // we can use buf mode with small buffer to reduce
1400 // the number of Read() calls in ReadLocalItem()
1401 _inBufMode = true;
1402 Buffer.Alloc(1 << 10);
1403 if (!Buffer.IsAllocated())
1404 return E_OUTOFMEMORY;
1405 */
1406
1407 CItemEx localItem;
1408 if (ReadUInt32() != NSignature::kLocalFileHeader)
1409 return S_FALSE;
1410 ReadLocalItem(localItem);
1411 if (!AreItemsEqual(localItem, item))
1412 return S_FALSE;
1413 item.LocalFullHeaderSize = localItem.LocalFullHeaderSize;
1414 item.LocalExtra = localItem.LocalExtra;
1415 if (item.Crc != localItem.Crc && !localItem.HasDescriptor())
1416 {
1417 item.Crc = localItem.Crc;
1418 headersError = true;
1419 }
1420 if ((item.Flags ^ localItem.Flags) & NFileHeader::NFlags::kDescriptorUsedMask)
1421 {
1422 item.Flags = (UInt16)(item.Flags ^ NFileHeader::NFlags::kDescriptorUsedMask);
1423 headersError = true;
1424 }
1425 item.FromLocal = true;
1426 }
1427 catch(...) { return S_FALSE; }
1428 return S_OK;
1429 }
1430
1431
1432 /*
1433 ---------- FindDescriptor ----------
1434
1435 in:
1436 _streamPos : position in Stream
1437 Stream :
1438 Vols : if (IsMultiVol)
1439
1440 action:
1441 searches descriptor in input stream(s).
1442 sets
1443 item.DescriptorWasRead = true;
1444 item.Size
1445 item.PackSize
1446 item.Crc
1447 if descriptor was found
1448
1449 out:
1450 S_OK:
1451 if ( item.DescriptorWasRead) : if descriptor was found
1452 if (!item.DescriptorWasRead) : if descriptor was not found : unexpected end of stream(s)
1453
1454 S_FALSE: if no items or there is just one item with strange properies that doesn't look like real archive.
1455
1456 another error code: Callback error.
1457
1458 exceptions :
1459 CSystemException() : stream reading error
1460 */
1461
FindDescriptor(CItemEx & item,unsigned numFiles)1462 HRESULT CInArchive::FindDescriptor(CItemEx &item, unsigned numFiles)
1463 {
1464 // const size_t kBufSize = (size_t)1 << 5; // don't increase it too much. It reads data look ahead.
1465
1466 // Buffer.Alloc(kBufSize);
1467 // Byte *buf = Buffer;
1468
1469 UInt64 packedSize = 0;
1470
1471 UInt64 progressPrev = _cnt;
1472
1473 for (;;)
1474 {
1475 /* appnote specification claims that we must use 64-bit descriptor, if there is zip64 extra.
1476 But some old third-party xps archives used 64-bit descriptor without zip64 extra. */
1477 // unsigned descriptorSize = kDataDescriptorSize64 + kNextSignatureSize;
1478
1479 // const unsigned kNextSignatureSize = 0; // we can disable check for next signatuire
1480 const unsigned kNextSignatureSize = 4; // we check also for signature for next File headear
1481
1482 const unsigned descriptorSize4 = item.GetDescriptorSize() + kNextSignatureSize;
1483
1484 if (descriptorSize4 > Buffer.Size()) return E_FAIL;
1485
1486 // size_t processedSize;
1487 CanStartNewVol = true;
1488 RINOK(LookAhead(descriptorSize4))
1489 const size_t avail = GetAvail();
1490
1491 if (avail < descriptorSize4)
1492 {
1493 // we write to packSize all these available bytes.
1494 // later it's simpler to work with such value than with 0
1495 // if (item.PackSize == 0)
1496 item.PackSize = packedSize + avail;
1497 if (item.Method == 0)
1498 item.Size = item.PackSize;
1499 SkipLookahed(avail);
1500 return S_OK;
1501 }
1502
1503 const Byte * const pStart = Buffer + _bufPos;
1504 const Byte * p = pStart;
1505 const Byte * const limit = pStart + (avail - descriptorSize4);
1506
1507 for (; p <= limit; p++)
1508 {
1509 // descriptor signature field is Info-ZIP's extension to pkware Zip specification.
1510 // New ZIP specification also allows descriptorSignature.
1511
1512 p = FindPK_4(p, limit + 1);
1513 if (p > limit)
1514 break;
1515
1516 /*
1517 if (*p != 0x50)
1518 continue;
1519 */
1520
1521 if (Get32(p) != NSignature::kDataDescriptor)
1522 continue;
1523
1524 // we check next signatuire after descriptor
1525 // maybe we need check only 2 bytes "PK" instead of 4 bytes, if some another type of header is possible after descriptor
1526 const UInt32 sig = Get32(p + descriptorSize4 - kNextSignatureSize);
1527 if ( sig != NSignature::kLocalFileHeader
1528 && sig != NSignature::kCentralFileHeader)
1529 continue;
1530
1531 const UInt64 packSizeCur = packedSize + (size_t)(p - pStart);
1532 if (descriptorSize4 == kDataDescriptorSize64 + kNextSignatureSize) // if (item.LocalExtra.IsZip64)
1533 {
1534 const UInt64 descriptorPackSize = Get64(p + 8);
1535 if (descriptorPackSize != packSizeCur)
1536 continue;
1537 item.Size = Get64(p + 16);
1538 }
1539 else
1540 {
1541 const UInt32 descriptorPackSize = Get32(p + 8);
1542 if (descriptorPackSize != (UInt32)packSizeCur)
1543 continue;
1544 item.Size = Get32(p + 12);
1545 // that item.Size can be truncated to 32-bit value here
1546 }
1547 // We write calculated 64-bit packSize, even if descriptor64 was not used
1548 item.PackSize = packSizeCur;
1549
1550 item.DescriptorWasRead = true;
1551 item.Crc = Get32(p + 4);
1552
1553 const size_t skip = (size_t)(p - pStart) + descriptorSize4 - kNextSignatureSize;
1554
1555 SkipLookahed(skip);
1556
1557 return S_OK;
1558 }
1559
1560 const size_t skip = (size_t)(p - pStart);
1561 SkipLookahed(skip);
1562
1563 packedSize += skip;
1564
1565 if (Callback)
1566 if (_cnt - progressPrev >= ((UInt32)1 << 22))
1567 {
1568 progressPrev = _cnt;
1569 const UInt64 numFiles64 = numFiles;
1570 RINOK(Callback->SetCompleted(&numFiles64, &_cnt))
1571 }
1572 }
1573 }
1574
1575
CheckDescriptor(const CItemEx & item)1576 HRESULT CInArchive::CheckDescriptor(const CItemEx &item)
1577 {
1578 if (!item.HasDescriptor())
1579 return S_OK;
1580
1581 // pkzip's version without descriptor signature is not supported
1582
1583 bool isFinished = false;
1584 RINOK(IncreaseRealPosition(item.PackSize, isFinished))
1585 if (isFinished)
1586 return S_FALSE;
1587
1588 /*
1589 if (!IsMultiVol)
1590 {
1591 RINOK(Seek_SavePos(ArcInfo.Base + item.GetDataPosition() + item.PackSize));
1592 }
1593 */
1594
1595 Byte buf[kDataDescriptorSize64];
1596 try
1597 {
1598 CanStartNewVol = true;
1599 SafeRead(buf, item.GetDescriptorSize());
1600 }
1601 catch (const CSystemException &e) { return e.ErrorCode; }
1602 // catch (const CUnexpectEnd &)
1603 catch(...)
1604 {
1605 return S_FALSE;
1606 }
1607 // RINOK(ReadStream_FALSE(Stream, buf, item.GetDescriptorSize()));
1608
1609 if (Get32(buf) != NSignature::kDataDescriptor)
1610 return S_FALSE;
1611 UInt32 crc = Get32(buf + 4);
1612 UInt64 packSize, unpackSize;
1613
1614 if (item.LocalExtra.IsZip64)
1615 {
1616 packSize = Get64(buf + 8);
1617 unpackSize = Get64(buf + 16);
1618 }
1619 else
1620 {
1621 packSize = Get32(buf + 8);
1622 unpackSize = Get32(buf + 12);
1623 }
1624
1625 if (crc != item.Crc || item.PackSize != packSize || item.Size != unpackSize)
1626 return S_FALSE;
1627 return S_OK;
1628 }
1629
1630
Read_LocalItem_After_CdItem_Full(CItemEx & item)1631 HRESULT CInArchive::Read_LocalItem_After_CdItem_Full(CItemEx &item)
1632 {
1633 if (item.FromLocal)
1634 return S_OK;
1635 try
1636 {
1637 bool isAvail = true;
1638 bool headersError = false;
1639 RINOK(Read_LocalItem_After_CdItem(item, isAvail, headersError))
1640 if (headersError)
1641 return S_FALSE;
1642 if (item.HasDescriptor())
1643 return CheckDescriptor(item);
1644 }
1645 catch(...) { return S_FALSE; }
1646 return S_OK;
1647 }
1648
1649
ReadCdItem(CItemEx & item)1650 HRESULT CInArchive::ReadCdItem(CItemEx &item)
1651 {
1652 item.FromCentral = true;
1653 Byte p[kCentralHeaderSize - 4];
1654 SafeRead(p, kCentralHeaderSize - 4);
1655
1656 item.MadeByVersion.Version = p[0];
1657 item.MadeByVersion.HostOS = p[1];
1658 item.ExtractVersion.Version = p[2];
1659 item.ExtractVersion.HostOS = p[3];
1660 G16(4, item.Flags);
1661 G16(6, item.Method);
1662 G32(8, item.Time);
1663 G32(12, item.Crc);
1664 G32(16, item.PackSize);
1665 G32(20, item.Size);
1666 const unsigned nameSize = Get16(p + 24);
1667 const unsigned extraSize = Get16(p + 26);
1668 const unsigned commentSize = Get16(p + 28);
1669 G16(30, item.Disk);
1670 G16(32, item.InternalAttrib);
1671 G32(34, item.ExternalAttrib);
1672 G32(38, item.LocalHeaderPos);
1673 ReadFileName(nameSize, item.Name);
1674
1675 if (extraSize > 0)
1676 ReadExtra(item, extraSize, item.CentralExtra, item.Size, item.PackSize, &item);
1677
1678 // May be these strings must be deleted
1679 /*
1680 if (item.IsDir())
1681 item.Size = 0;
1682 */
1683
1684 ReadBuffer(item.Comment, commentSize);
1685 return S_OK;
1686 }
1687
1688
1689 /*
1690 TryEcd64()
1691 (_inBufMode == false) is expected here
1692 so TryEcd64() can't change the Buffer.
1693 if (Ecd64 is not covered by cached region),
1694 TryEcd64() can change cached region ranges (_bufCached, _bufPos) and _streamPos.
1695 */
1696
TryEcd64(UInt64 offset,CCdInfo & cdInfo)1697 HRESULT CInArchive::TryEcd64(UInt64 offset, CCdInfo &cdInfo)
1698 {
1699 if (offset >= ((UInt64)1 << 63))
1700 return S_FALSE;
1701 Byte buf[kEcd64_FullSize];
1702
1703 RINOK(SeekToVol(Vols.StreamIndex, offset))
1704 RINOK(ReadFromCache_FALSE(buf, kEcd64_FullSize))
1705
1706 if (Get32(buf) != NSignature::kEcd64)
1707 return S_FALSE;
1708 UInt64 mainSize = Get64(buf + 4);
1709 if (mainSize < kEcd64_MainSize || mainSize > ((UInt64)1 << 40))
1710 return S_FALSE;
1711 cdInfo.ParseEcd64e(buf + 12);
1712 return S_OK;
1713 }
1714
1715
1716 /* FindCd() doesn't use previous cached region,
1717 but it uses Buffer. So it sets new cached region */
1718
FindCd(bool checkOffsetMode)1719 HRESULT CInArchive::FindCd(bool checkOffsetMode)
1720 {
1721 CCdInfo &cdInfo = Vols.ecd;
1722
1723 UInt64 endPos;
1724
1725 // There are no useful data in cache in most cases here.
1726 // So here we don't use cache data from previous operations .
1727
1728 InitBuf();
1729 RINOK(InStream_GetSize_SeekToEnd(Stream, endPos))
1730 _streamPos = endPos;
1731
1732 // const UInt32 kBufSizeMax2 = ((UInt32)1 << 16) + kEcdSize + kEcd64Locator_Size + kEcd64_FullSize;
1733 const size_t kBufSizeMax = ((size_t)1 << 17); // must be larger than kBufSizeMax2
1734
1735 const size_t bufSize = (endPos < kBufSizeMax) ? (size_t)endPos : kBufSizeMax;
1736 if (bufSize < kEcdSize)
1737 return S_FALSE;
1738 // CByteArr byteBuffer(bufSize);
1739
1740 RINOK(AllocateBuffer(kBufSizeMax))
1741
1742 RINOK(Seek_SavePos(endPos - bufSize))
1743
1744 size_t processed = bufSize;
1745 HRESULT res = ReadStream(Stream, Buffer, &processed);
1746 _streamPos += processed;
1747 _bufCached = processed;
1748 _bufPos = 0;
1749 _cnt += processed;
1750 if (res != S_OK)
1751 return res;
1752 if (processed != bufSize)
1753 return S_FALSE;
1754
1755
1756 for (size_t i = bufSize - kEcdSize + 1;;)
1757 {
1758 if (i == 0)
1759 return S_FALSE;
1760
1761 const Byte *buf = Buffer;
1762
1763 for (;;)
1764 {
1765 i--;
1766 if (buf[i] == 0x50)
1767 break;
1768 if (i == 0)
1769 return S_FALSE;
1770 }
1771
1772 if (Get32(buf + i) != NSignature::kEcd)
1773 continue;
1774
1775 cdInfo.ParseEcd32(buf + i);
1776
1777 if (i >= kEcd64Locator_Size)
1778 {
1779 const size_t locatorIndex = i - kEcd64Locator_Size;
1780 if (Get32(buf + locatorIndex) == NSignature::kEcd64Locator)
1781 {
1782 CLocator locator;
1783 locator.Parse(buf + locatorIndex + 4);
1784 UInt32 numDisks = locator.NumDisks;
1785 // we ignore the error, where some zip creators use (NumDisks == 0)
1786 if (numDisks == 0)
1787 numDisks = 1;
1788 if ((cdInfo.ThisDisk == numDisks - 1 || ZIP64_IS_16_MAX(cdInfo.ThisDisk))
1789 && locator.Ecd64Disk < numDisks)
1790 {
1791 if (locator.Ecd64Disk != cdInfo.ThisDisk && !ZIP64_IS_16_MAX(cdInfo.ThisDisk))
1792 return E_NOTIMPL;
1793
1794 // Most of the zip64 use fixed size Zip64 ECD
1795 // we try relative backward reading.
1796
1797 UInt64 absEcd64 = endPos - bufSize + i - (kEcd64Locator_Size + kEcd64_FullSize);
1798
1799 if (locatorIndex >= kEcd64_FullSize)
1800 if (checkOffsetMode || absEcd64 == locator.Ecd64Offset)
1801 {
1802 const Byte *ecd64 = buf + locatorIndex - kEcd64_FullSize;
1803 if (Get32(ecd64) == NSignature::kEcd64)
1804 {
1805 UInt64 mainEcd64Size = Get64(ecd64 + 4);
1806 if (mainEcd64Size == kEcd64_MainSize)
1807 {
1808 cdInfo.ParseEcd64e(ecd64 + 12);
1809 ArcInfo.Base = (Int64)(absEcd64 - locator.Ecd64Offset);
1810 // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1811 return S_OK;
1812 }
1813 }
1814 }
1815
1816 // some zip64 use variable size Zip64 ECD.
1817 // we try to use absolute offset from locator.
1818
1819 if (absEcd64 != locator.Ecd64Offset)
1820 {
1821 if (TryEcd64(locator.Ecd64Offset, cdInfo) == S_OK)
1822 {
1823 ArcInfo.Base = 0;
1824 // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1825 return S_OK;
1826 }
1827 }
1828
1829 // for variable Zip64 ECD with for archives with offset != 0.
1830
1831 if (checkOffsetMode
1832 && ArcInfo.MarkerPos != 0
1833 && ArcInfo.MarkerPos + locator.Ecd64Offset != absEcd64)
1834 {
1835 if (TryEcd64(ArcInfo.MarkerPos + locator.Ecd64Offset, cdInfo) == S_OK)
1836 {
1837 ArcInfo.Base = (Int64)ArcInfo.MarkerPos;
1838 // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1839 return S_OK;
1840 }
1841 }
1842 }
1843 }
1844 }
1845
1846 // bool isVolMode = (Vols.EndVolIndex != -1);
1847 // UInt32 searchDisk = (isVolMode ? Vols.EndVolIndex : 0);
1848
1849 if (/* searchDisk == thisDisk && */ cdInfo.CdDisk <= cdInfo.ThisDisk)
1850 {
1851 // if (isVolMode)
1852 {
1853 if (cdInfo.CdDisk != cdInfo.ThisDisk)
1854 return S_OK;
1855 }
1856
1857 UInt64 absEcdPos = endPos - bufSize + i;
1858 UInt64 cdEnd = cdInfo.Size + cdInfo.Offset;
1859 ArcInfo.Base = 0;
1860 // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1861 if (absEcdPos != cdEnd)
1862 {
1863 /*
1864 if (cdInfo.Offset <= 16 && cdInfo.Size != 0)
1865 {
1866 // here we support some rare ZIP files with Central directory at the start
1867 ArcInfo.Base = 0;
1868 }
1869 else
1870 */
1871 ArcInfo.Base = (Int64)(absEcdPos - cdEnd);
1872 }
1873 return S_OK;
1874 }
1875 }
1876 }
1877
1878
TryReadCd(CObjectVector<CItemEx> & items,const CCdInfo & cdInfo,UInt64 cdOffset,UInt64 cdSize)1879 HRESULT CInArchive::TryReadCd(CObjectVector<CItemEx> &items, const CCdInfo &cdInfo, UInt64 cdOffset, UInt64 cdSize)
1880 {
1881 items.Clear();
1882 IsCdUnsorted = false;
1883
1884 // _startLocalFromCd_Disk = (UInt32)(Int32)-1;
1885 // _startLocalFromCd_Offset = (UInt64)(Int64)-1;
1886
1887 RINOK(SeekToVol(IsMultiVol ? (int)cdInfo.CdDisk : -1, cdOffset))
1888
1889 _inBufMode = true;
1890 _cnt = 0;
1891
1892 if (Callback)
1893 {
1894 RINOK(Callback->SetTotal(&cdInfo.NumEntries, IsMultiVol ? &Vols.TotalBytesSize : NULL))
1895 }
1896 UInt64 numFileExpected = cdInfo.NumEntries;
1897 const UInt64 *totalFilesPtr = &numFileExpected;
1898 bool isCorrect_NumEntries = (cdInfo.IsFromEcd64 || numFileExpected >= ((UInt32)1 << 16));
1899
1900 while (_cnt < cdSize)
1901 {
1902 CanStartNewVol = true;
1903 if (ReadUInt32() != NSignature::kCentralFileHeader)
1904 return S_FALSE;
1905 CanStartNewVol = false;
1906 {
1907 CItemEx cdItem;
1908 RINOK(ReadCdItem(cdItem))
1909
1910 /*
1911 if (cdItem.Disk < _startLocalFromCd_Disk ||
1912 cdItem.Disk == _startLocalFromCd_Disk &&
1913 cdItem.LocalHeaderPos < _startLocalFromCd_Offset)
1914 {
1915 _startLocalFromCd_Disk = cdItem.Disk;
1916 _startLocalFromCd_Offset = cdItem.LocalHeaderPos;
1917 }
1918 */
1919
1920 if (items.Size() > 0 && !IsCdUnsorted)
1921 {
1922 const CItemEx &prev = items.Back();
1923 if (cdItem.Disk < prev.Disk
1924 || (cdItem.Disk == prev.Disk &&
1925 cdItem.LocalHeaderPos < prev.LocalHeaderPos))
1926 IsCdUnsorted = true;
1927 }
1928
1929 items.Add(cdItem);
1930 }
1931 if (Callback && (items.Size() & 0xFFF) == 0)
1932 {
1933 const UInt64 numFiles = items.Size();
1934
1935 if (numFiles > numFileExpected && totalFilesPtr)
1936 {
1937 if (isCorrect_NumEntries)
1938 totalFilesPtr = NULL;
1939 else
1940 while (numFiles > numFileExpected)
1941 numFileExpected += (UInt32)1 << 16;
1942 RINOK(Callback->SetTotal(totalFilesPtr, NULL))
1943 }
1944
1945 RINOK(Callback->SetCompleted(&numFiles, &_cnt))
1946 }
1947 }
1948
1949 CanStartNewVol = true;
1950
1951 return (_cnt == cdSize) ? S_OK : S_FALSE;
1952 }
1953
1954
1955 /*
1956 static int CompareCdItems(void *const *elem1, void *const *elem2, void *)
1957 {
1958 const CItemEx *i1 = *(const CItemEx **)elem1;
1959 const CItemEx *i2 = *(const CItemEx **)elem2;
1960
1961 if (i1->Disk < i2->Disk) return -1;
1962 if (i1->Disk > i2->Disk) return 1;
1963 if (i1->LocalHeaderPos < i2->LocalHeaderPos) return -1;
1964 if (i1->LocalHeaderPos > i2->LocalHeaderPos) return 1;
1965 if (i1 < i2) return -1;
1966 if (i1 > i2) return 1;
1967 return 0;
1968 }
1969 */
1970
ReadCd(CObjectVector<CItemEx> & items,UInt32 & cdDisk,UInt64 & cdOffset,UInt64 & cdSize)1971 HRESULT CInArchive::ReadCd(CObjectVector<CItemEx> &items, UInt32 &cdDisk, UInt64 &cdOffset, UInt64 &cdSize)
1972 {
1973 bool checkOffsetMode = true;
1974
1975 if (IsMultiVol)
1976 {
1977 if (Vols.EndVolIndex == -1)
1978 return S_FALSE;
1979 Stream = Vols.Streams[(unsigned)Vols.EndVolIndex].Stream;
1980 if (!Vols.StartIsZip)
1981 checkOffsetMode = false;
1982 }
1983 else
1984 Stream = StartStream;
1985
1986 if (!Vols.ecd_wasRead)
1987 {
1988 RINOK(FindCd(checkOffsetMode))
1989 }
1990
1991 CCdInfo &cdInfo = Vols.ecd;
1992
1993 HRESULT res = S_FALSE;
1994
1995 cdSize = cdInfo.Size;
1996 cdOffset = cdInfo.Offset;
1997 cdDisk = cdInfo.CdDisk;
1998
1999 if (!IsMultiVol)
2000 {
2001 if (cdInfo.ThisDisk != cdInfo.CdDisk)
2002 return S_FALSE;
2003 }
2004
2005 const UInt64 base = (IsMultiVol ? 0 : (UInt64)ArcInfo.Base);
2006 res = TryReadCd(items, cdInfo, base + cdOffset, cdSize);
2007
2008 if (res == S_FALSE && !IsMultiVol && base != ArcInfo.MarkerPos)
2009 {
2010 // do we need that additional attempt to read cd?
2011 res = TryReadCd(items, cdInfo, ArcInfo.MarkerPos + cdOffset, cdSize);
2012 if (res == S_OK)
2013 ArcInfo.Base = (Int64)ArcInfo.MarkerPos;
2014 }
2015
2016 // Some rare case files are unsorted
2017 // items.Sort(CompareCdItems, NULL);
2018 return res;
2019 }
2020
2021
FindItem(const CObjectVector<CItemEx> & items,const CItemEx & item)2022 static int FindItem(const CObjectVector<CItemEx> &items, const CItemEx &item)
2023 {
2024 unsigned left = 0, right = items.Size();
2025 for (;;)
2026 {
2027 if (left >= right)
2028 return -1;
2029 const unsigned index = (unsigned)(((size_t)left + (size_t)right) / 2);
2030 const CItemEx &item2 = items[index];
2031 if (item.Disk < item2.Disk)
2032 right = index;
2033 else if (item.Disk > item2.Disk)
2034 left = index + 1;
2035 else if (item.LocalHeaderPos == item2.LocalHeaderPos)
2036 return (int)index;
2037 else if (item.LocalHeaderPos < item2.LocalHeaderPos)
2038 right = index;
2039 else
2040 left = index + 1;
2041 }
2042 }
2043
IsStrangeItem(const CItem & item)2044 static bool IsStrangeItem(const CItem &item)
2045 {
2046 return item.Name.Len() > (1 << 14) || item.Method > (1 << 8);
2047 }
2048
2049
2050
2051 /*
2052 ---------- ReadLocals ----------
2053
2054 in:
2055 (_signature == NSignature::kLocalFileHeader)
2056 VirtStreamPos : after _signature : position in Stream
2057 Stream :
2058 Vols : if (IsMultiVol)
2059 (_inBufMode == false)
2060
2061 action:
2062 it parses local items.
2063
2064 if ( IsMultiVol) it writes absolute offsets to CItemEx::LocalHeaderPos
2065 if (!IsMultiVol) it writes relative (from ArcInfo.Base) offsets to CItemEx::LocalHeaderPos
2066 later we can correct CItemEx::LocalHeaderPos values, if
2067 some new value for ArcInfo.Base will be detected
2068 out:
2069 S_OK:
2070 (_signature != NSignature::kLocalFileHeade)
2071 _streamPos : after _signature
2072
2073 S_FALSE: if no items or there is just one item with strange properies that doesn't look like real archive.
2074
2075 another error code: stream reading error or Callback error.
2076
2077 CUnexpectEnd() exception : it's not fatal exception here.
2078 It means that reading was interrupted by unexpected end of input stream,
2079 but some CItemEx items were parsed OK.
2080 We can stop further archive parsing.
2081 But we can use all filled CItemEx items.
2082 */
2083
ReadLocals(CObjectVector<CItemEx> & items)2084 HRESULT CInArchive::ReadLocals(CObjectVector<CItemEx> &items)
2085 {
2086 items.Clear();
2087
2088 UInt64 progressPrev = _cnt;
2089
2090 if (Callback)
2091 {
2092 RINOK(Callback->SetTotal(NULL, IsMultiVol ? &Vols.TotalBytesSize : NULL))
2093 }
2094
2095 while (_signature == NSignature::kLocalFileHeader)
2096 {
2097 CItemEx item;
2098
2099 item.LocalHeaderPos = GetVirtStreamPos() - 4;
2100 if (!IsMultiVol)
2101 item.LocalHeaderPos = (UInt64)((Int64)item.LocalHeaderPos - ArcInfo.Base);
2102
2103 try
2104 {
2105 ReadLocalItem(item);
2106 item.FromLocal = true;
2107 bool isFinished = false;
2108
2109 if (item.HasDescriptor())
2110 {
2111 RINOK(FindDescriptor(item, items.Size()))
2112 isFinished = !item.DescriptorWasRead;
2113 }
2114 else
2115 {
2116 if (item.PackSize >= ((UInt64)1 << 62))
2117 throw CUnexpectEnd();
2118 RINOK(IncreaseRealPosition(item.PackSize, isFinished))
2119 }
2120
2121 items.Add(item);
2122
2123 if (isFinished)
2124 throw CUnexpectEnd();
2125
2126 ReadSignature();
2127 }
2128 catch (CUnexpectEnd &)
2129 {
2130 if (items.IsEmpty() || (items.Size() == 1 && IsStrangeItem(items[0])))
2131 return S_FALSE;
2132 throw;
2133 }
2134
2135
2136 if (Callback)
2137 if ((items.Size() & 0xFF) == 0
2138 || _cnt - progressPrev >= ((UInt32)1 << 22))
2139 {
2140 progressPrev = _cnt;
2141 const UInt64 numFiles = items.Size();
2142 RINOK(Callback->SetCompleted(&numFiles, &_cnt))
2143 }
2144 }
2145
2146 if (items.Size() == 1 && _signature != NSignature::kCentralFileHeader)
2147 if (IsStrangeItem(items[0]))
2148 return S_FALSE;
2149
2150 return S_OK;
2151 }
2152
2153
2154
ParseArcName(IArchiveOpenVolumeCallback * volCallback)2155 HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback)
2156 {
2157 UString name;
2158 {
2159 NWindows::NCOM::CPropVariant prop;
2160 RINOK(volCallback->GetProperty(kpidName, &prop))
2161 if (prop.vt != VT_BSTR)
2162 return S_OK;
2163 name = prop.bstrVal;
2164 }
2165
2166 const int dotPos = name.ReverseFind_Dot();
2167 if (dotPos < 0)
2168 return S_OK;
2169 const UString ext = name.Ptr((unsigned)(dotPos + 1));
2170 name.DeleteFrom((unsigned)(dotPos + 1));
2171
2172 StartVolIndex = (Int32)(-1);
2173
2174 if (ext.IsEmpty())
2175 return S_OK;
2176 {
2177 wchar_t c = ext[0];
2178 IsUpperCase = (c >= 'A' && c <= 'Z');
2179 if (ext.IsEqualTo_Ascii_NoCase("zip"))
2180 {
2181 BaseName = name;
2182 StartIsZ = true;
2183 StartIsZip = true;
2184 return S_OK;
2185 }
2186 else if (ext.IsEqualTo_Ascii_NoCase("exe"))
2187 {
2188 /* possible cases:
2189 - exe with zip inside
2190 - sfx: a.exe, a.z02, a.z03,... , a.zip
2191 a.exe is start volume.
2192 - zip renamed to exe
2193 */
2194
2195 StartIsExe = true;
2196 BaseName = name;
2197 StartVolIndex = 0;
2198 /* sfx-zip can use both arc.exe and arc.zip
2199 We can open arc.zip, if it was requesed to open arc.exe.
2200 But it's possible that arc.exe and arc.zip are not parts of same archive.
2201 So we can disable such operation */
2202
2203 // 18.04: we still want to open zip renamed to exe.
2204 /*
2205 {
2206 UString volName = name;
2207 volName += IsUpperCase ? "Z01" : "z01";
2208 {
2209 CMyComPtr<IInStream> stream;
2210 HRESULT res2 = volCallback->GetStream(volName, &stream);
2211 if (res2 == S_OK)
2212 DisableVolsSearch = true;
2213 }
2214 }
2215 */
2216 DisableVolsSearch = true;
2217 return S_OK;
2218 }
2219 else if (ext[0] == 'z' || ext[0] == 'Z')
2220 {
2221 if (ext.Len() < 3)
2222 return S_OK;
2223 const wchar_t *end = NULL;
2224 UInt32 volNum = ConvertStringToUInt32(ext.Ptr(1), &end);
2225 if (*end != 0 || volNum < 1 || volNum > ((UInt32)1 << 30))
2226 return S_OK;
2227 StartVolIndex = (Int32)(volNum - 1);
2228 BaseName = name;
2229 StartIsZ = true;
2230 }
2231 else
2232 return S_OK;
2233 }
2234
2235 UString volName = BaseName;
2236 volName += (IsUpperCase ? "ZIP" : "zip");
2237
2238 HRESULT res = volCallback->GetStream(volName, &ZipStream);
2239
2240 if (res == S_FALSE || !ZipStream)
2241 {
2242 if (MissingName.IsEmpty())
2243 {
2244 MissingZip = true;
2245 MissingName = volName;
2246 }
2247 return S_OK;
2248 }
2249
2250 return res;
2251 }
2252
2253
ReadVols2(IArchiveOpenVolumeCallback * volCallback,unsigned start,int lastDisk,int zipDisk,unsigned numMissingVolsMax,unsigned & numMissingVols)2254 HRESULT CInArchive::ReadVols2(IArchiveOpenVolumeCallback *volCallback,
2255 unsigned start, int lastDisk, int zipDisk, unsigned numMissingVolsMax, unsigned &numMissingVols)
2256 {
2257 if (Vols.DisableVolsSearch)
2258 return S_OK;
2259
2260 numMissingVols = 0;
2261
2262 for (unsigned i = start;; i++)
2263 {
2264 if (lastDisk >= 0 && i >= (unsigned)lastDisk)
2265 break;
2266
2267 if (i < Vols.Streams.Size())
2268 if (Vols.Streams[i].Stream)
2269 continue;
2270
2271 CMyComPtr<IInStream> stream;
2272
2273 if ((int)i == zipDisk)
2274 {
2275 stream = Vols.ZipStream;
2276 }
2277 else if ((int)i == Vols.StartVolIndex)
2278 {
2279 stream = StartStream;
2280 }
2281 else
2282 {
2283 UString volName = Vols.BaseName;
2284 {
2285 volName.Add_Char(Vols.IsUpperCase ? 'Z' : 'z');
2286 const unsigned v = i + 1;
2287 if (v < 10)
2288 volName.Add_Char('0');
2289 volName.Add_UInt32(v);
2290 }
2291
2292 HRESULT res = volCallback->GetStream(volName, &stream);
2293 if (res != S_OK && res != S_FALSE)
2294 return res;
2295 if (res == S_FALSE || !stream)
2296 {
2297 if (i == 0)
2298 {
2299 UString volName_exe = Vols.BaseName;
2300 volName_exe += (Vols.IsUpperCase ? "EXE" : "exe");
2301
2302 HRESULT res2 = volCallback->GetStream(volName_exe, &stream);
2303 if (res2 != S_OK && res2 != S_FALSE)
2304 return res2;
2305 res = res2;
2306 }
2307 }
2308 if (res == S_FALSE || !stream)
2309 {
2310 if (i == 1 && Vols.StartIsExe)
2311 return S_OK;
2312 if (Vols.MissingName.IsEmpty())
2313 Vols.MissingName = volName;
2314 numMissingVols++;
2315 if (numMissingVols > numMissingVolsMax)
2316 return S_OK;
2317 if (lastDisk == -1 && numMissingVols != 0)
2318 return S_OK;
2319 continue;
2320 }
2321 }
2322
2323 UInt64 pos, size;
2324 RINOK(InStream_GetPos_GetSize(stream, pos, size))
2325
2326 while (i >= Vols.Streams.Size())
2327 Vols.Streams.AddNew();
2328
2329 CVols::CSubStreamInfo &ss = Vols.Streams[i];
2330 Vols.NumVols++;
2331 Vols.TotalBytesSize += size;
2332
2333 ss.Stream = stream;
2334 ss.Size = size;
2335
2336 if ((int)i == zipDisk)
2337 {
2338 Vols.EndVolIndex = (int)(Vols.Streams.Size() - 1);
2339 break;
2340 }
2341 }
2342
2343 return S_OK;
2344 }
2345
2346
ReadVols()2347 HRESULT CInArchive::ReadVols()
2348 {
2349 CMyComPtr<IArchiveOpenVolumeCallback> volCallback;
2350
2351 Callback->QueryInterface(IID_IArchiveOpenVolumeCallback, (void **)&volCallback);
2352 if (!volCallback)
2353 return S_OK;
2354
2355 RINOK(Vols.ParseArcName(volCallback))
2356
2357 // const int startZIndex = Vols.StartVolIndex;
2358
2359 if (!Vols.StartIsZ)
2360 {
2361 if (!Vols.StartIsExe)
2362 return S_OK;
2363 }
2364
2365 int zipDisk = -1;
2366 int cdDisk = -1;
2367
2368 if (Vols.StartIsZip)
2369 Vols.ZipStream = StartStream;
2370
2371 if (Vols.ZipStream)
2372 {
2373 Stream = Vols.ZipStream;
2374
2375 if (Vols.StartIsZip)
2376 Vols.StreamIndex = -1;
2377 else
2378 {
2379 Vols.StreamIndex = -2;
2380 InitBuf();
2381 }
2382
2383 HRESULT res = FindCd(true);
2384
2385 CCdInfo &ecd = Vols.ecd;
2386 if (res == S_OK)
2387 {
2388 zipDisk = (int)ecd.ThisDisk;
2389 Vols.ecd_wasRead = true;
2390
2391 // if is not multivol or bad multivol, we return to main single stream code
2392 if (ecd.ThisDisk == 0
2393 || ecd.ThisDisk >= ((UInt32)1 << 30)
2394 || ecd.ThisDisk < ecd.CdDisk)
2395 return S_OK;
2396
2397 cdDisk = (int)ecd.CdDisk;
2398 if (Vols.StartVolIndex < 0)
2399 Vols.StartVolIndex = (Int32)ecd.ThisDisk;
2400 else if ((UInt32)Vols.StartVolIndex >= ecd.ThisDisk)
2401 return S_OK;
2402
2403 // Vols.StartVolIndex = ecd.ThisDisk;
2404 // Vols.EndVolIndex = ecd.ThisDisk;
2405 unsigned numMissingVols;
2406 if (cdDisk != zipDisk)
2407 {
2408 // get volumes required for cd.
2409 RINOK(ReadVols2(volCallback, (unsigned)cdDisk, zipDisk, zipDisk, 0, numMissingVols))
2410 if (numMissingVols != 0)
2411 {
2412 // cdOK = false;
2413 }
2414 }
2415 }
2416 else if (res != S_FALSE)
2417 return res;
2418 }
2419
2420 if (Vols.StartVolIndex < 0)
2421 {
2422 // is not mutivol;
2423 return S_OK;
2424 }
2425
2426 /*
2427 if (!Vols.Streams.IsEmpty())
2428 IsMultiVol = true;
2429 */
2430
2431 unsigned numMissingVols;
2432
2433 if (cdDisk != 0)
2434 {
2435 // get volumes that were no requested still
2436 const unsigned kNumMissingVolsMax = 1 << 12;
2437 RINOK(ReadVols2(volCallback, 0, cdDisk < 0 ? -1 : cdDisk, zipDisk, kNumMissingVolsMax, numMissingVols))
2438 }
2439
2440 // if (Vols.StartVolIndex >= 0)
2441 {
2442 if (Vols.Streams.IsEmpty())
2443 if (Vols.StartVolIndex > (1 << 20))
2444 return S_OK;
2445 if ((unsigned)Vols.StartVolIndex >= Vols.Streams.Size()
2446 || !Vols.Streams[(unsigned)Vols.StartVolIndex].Stream)
2447 {
2448 // we get volumes starting from StartVolIndex, if they we not requested before know the volume index (if FindCd() was ok)
2449 RINOK(ReadVols2(volCallback, (unsigned)Vols.StartVolIndex, zipDisk, zipDisk, 0, numMissingVols))
2450 }
2451 }
2452
2453 if (Vols.ZipStream)
2454 {
2455 // if there is no another volumes and volumeIndex is too big, we don't use multivol mode
2456 if (Vols.Streams.IsEmpty())
2457 if (zipDisk > (1 << 10))
2458 return S_OK;
2459 if (zipDisk >= 0)
2460 {
2461 // we create item in Streams for ZipStream, if we know the volume index (if FindCd() was ok)
2462 RINOK(ReadVols2(volCallback, (unsigned)zipDisk, zipDisk + 1, zipDisk, 0, numMissingVols))
2463 }
2464 }
2465
2466 if (!Vols.Streams.IsEmpty())
2467 {
2468 IsMultiVol = true;
2469 /*
2470 if (cdDisk)
2471 IsMultiVol = true;
2472 */
2473 const int startZIndex = Vols.StartVolIndex;
2474 if (startZIndex >= 0)
2475 {
2476 // if all volumes before start volume are OK, we can start parsing from 0
2477 // if there are missing volumes before startZIndex, we start parsing in current startZIndex
2478 if ((unsigned)startZIndex < Vols.Streams.Size())
2479 {
2480 for (unsigned i = 0; i <= (unsigned)startZIndex; i++)
2481 if (!Vols.Streams[i].Stream)
2482 {
2483 Vols.StartParsingVol = startZIndex;
2484 break;
2485 }
2486 }
2487 }
2488 }
2489
2490 return S_OK;
2491 }
2492
2493
2494
Read(void * data,UInt32 size,UInt32 * processedSize)2495 HRESULT CVols::Read(void *data, UInt32 size, UInt32 *processedSize)
2496 {
2497 if (processedSize)
2498 *processedSize = 0;
2499 if (size == 0)
2500 return S_OK;
2501
2502 for (;;)
2503 {
2504 if (StreamIndex < 0)
2505 return S_OK;
2506 if ((unsigned)StreamIndex >= Streams.Size())
2507 return S_OK;
2508 const CVols::CSubStreamInfo &s = Streams[(unsigned)StreamIndex];
2509 if (!s.Stream)
2510 return S_FALSE;
2511 if (NeedSeek)
2512 {
2513 RINOK(s.SeekToStart())
2514 NeedSeek = false;
2515 }
2516 UInt32 realProcessedSize = 0;
2517 HRESULT res = s.Stream->Read(data, size, &realProcessedSize);
2518 if (processedSize)
2519 *processedSize = realProcessedSize;
2520 if (res != S_OK)
2521 return res;
2522 if (realProcessedSize != 0)
2523 return res;
2524 StreamIndex++;
2525 NeedSeek = true;
2526 }
2527 }
2528
Z7_COM7F_IMF(CVolStream::Read (void * data,UInt32 size,UInt32 * processedSize))2529 Z7_COM7F_IMF(CVolStream::Read(void *data, UInt32 size, UInt32 *processedSize))
2530 {
2531 return Vols->Read(data, size, processedSize);
2532 }
2533
2534
2535
2536
2537 #define COPY_ECD_ITEM_16(n) if (!isZip64 || !ZIP64_IS_16_MAX(ecd. n)) cdInfo. n = ecd. n;
2538 #define COPY_ECD_ITEM_32(n) if (!isZip64 || !ZIP64_IS_32_MAX(ecd. n)) cdInfo. n = ecd. n;
2539
2540
ReadHeaders(CObjectVector<CItemEx> & items)2541 HRESULT CInArchive::ReadHeaders(CObjectVector<CItemEx> &items)
2542 {
2543 // buffer that can be used for cd reading
2544 RINOK(AllocateBuffer(kSeqBufferSize))
2545
2546 // here we can read small records. So we switch off _inBufMode.
2547 _inBufMode = false;
2548
2549 HRESULT res = S_OK;
2550
2551 bool localsWereRead = false;
2552
2553 /* we try to open archive with the following modes:
2554 1) CD-MODE : fast mode : we read backward ECD and CD, compare CD items with first Local item.
2555 2) LOCALS-CD-MODE : slow mode, if CD-MODE fails : we sequentially read all Locals and then CD.
2556 Then we read sequentially ECD64, Locator, ECD again at the end.
2557
2558 - in LOCALS-CD-MODE we use use the following
2559 variables (with real cd properties) to set Base archive offset
2560 and check real cd properties with values from ECD/ECD64.
2561 */
2562
2563 UInt64 cdSize = 0;
2564 UInt64 cdRelatOffset = 0;
2565 UInt32 cdDisk = 0;
2566
2567 UInt64 cdAbsOffset = 0; // absolute cd offset, for LOCALS-CD-MODE only.
2568
2569 if (Force_ReadLocals_Mode)
2570 {
2571 IsArc = true;
2572 res = S_FALSE; // we will use LOCALS-CD-MODE mode
2573 }
2574 else
2575 {
2576 if (!MarkerIsFound || !MarkerIsSafe)
2577 {
2578 IsArc = true;
2579 res = ReadCd(items, cdDisk, cdRelatOffset, cdSize);
2580 if (res == S_OK)
2581 ReadSignature();
2582 else if (res != S_FALSE)
2583 return res;
2584 }
2585 else // (MarkerIsFound && MarkerIsSafe)
2586 {
2587
2588 // _signature must be kLocalFileHeader or kEcd or kEcd64
2589
2590 SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2 + 4);
2591
2592 CanStartNewVol = false;
2593
2594 if (_signature == NSignature::kEcd64)
2595 {
2596 // UInt64 ecd64Offset = GetVirtStreamPos() - 4;
2597 IsZip64 = true;
2598
2599 {
2600 const UInt64 recordSize = ReadUInt64();
2601 if (recordSize < kEcd64_MainSize)
2602 return S_FALSE;
2603 if (recordSize >= ((UInt64)1 << 62))
2604 return S_FALSE;
2605
2606 {
2607 const unsigned kBufSize = kEcd64_MainSize;
2608 Byte buf[kBufSize];
2609 SafeRead(buf, kBufSize);
2610 CCdInfo cdInfo;
2611 cdInfo.ParseEcd64e(buf);
2612 if (!cdInfo.IsEmptyArc())
2613 return S_FALSE;
2614 }
2615
2616 RINOK(Skip64(recordSize - kEcd64_MainSize, 0))
2617 }
2618
2619 ReadSignature();
2620 if (_signature != NSignature::kEcd64Locator)
2621 return S_FALSE;
2622
2623 {
2624 const unsigned kBufSize = 16;
2625 Byte buf[kBufSize];
2626 SafeRead(buf, kBufSize);
2627 CLocator locator;
2628 locator.Parse(buf);
2629 if (!locator.IsEmptyArc())
2630 return S_FALSE;
2631 }
2632
2633 ReadSignature();
2634 if (_signature != NSignature::kEcd)
2635 return S_FALSE;
2636 }
2637
2638 if (_signature == NSignature::kEcd)
2639 {
2640 // It must be empty archive or backware archive
2641 // we don't support backware archive still
2642
2643 const unsigned kBufSize = kEcdSize - 4;
2644 Byte buf[kBufSize];
2645 SafeRead(buf, kBufSize);
2646 CEcd ecd;
2647 ecd.Parse(buf);
2648 // if (ecd.cdSize != 0)
2649 // Do we need also to support the case where empty zip archive with PK00 uses cdOffset = 4 ??
2650 if (!ecd.IsEmptyArc())
2651 return S_FALSE;
2652
2653 ArcInfo.Base = (Int64)ArcInfo.MarkerPos;
2654 IsArc = true; // check it: we need more tests?
2655
2656 RINOK(SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2))
2657 ReadSignature();
2658 }
2659 else
2660 {
2661 CItemEx firstItem;
2662 try
2663 {
2664 try
2665 {
2666 if (!ReadLocalItem(firstItem))
2667 return S_FALSE;
2668 }
2669 catch(CUnexpectEnd &)
2670 {
2671 return S_FALSE;
2672 }
2673
2674 IsArc = true;
2675 res = ReadCd(items, cdDisk, cdRelatOffset, cdSize);
2676 if (res == S_OK)
2677 ReadSignature();
2678 }
2679 catch(CUnexpectEnd &) { res = S_FALSE; }
2680
2681 if (res != S_FALSE && res != S_OK)
2682 return res;
2683
2684 if (res == S_OK && items.Size() == 0)
2685 res = S_FALSE;
2686
2687 if (res == S_OK)
2688 {
2689 // we can't read local items here to keep _inBufMode state
2690 if ((Int64)ArcInfo.MarkerPos2 < ArcInfo.Base)
2691 res = S_FALSE;
2692 else
2693 {
2694 firstItem.LocalHeaderPos = (UInt64)((Int64)ArcInfo.MarkerPos2 - ArcInfo.Base);
2695 int index = -1;
2696
2697 UInt32 min_Disk = (UInt32)(Int32)-1;
2698 UInt64 min_LocalHeaderPos = (UInt64)(Int64)-1;
2699
2700 if (!IsCdUnsorted)
2701 index = FindItem(items, firstItem);
2702 else
2703 {
2704 FOR_VECTOR (i, items)
2705 {
2706 const CItemEx &cdItem = items[i];
2707 if (cdItem.Disk == firstItem.Disk
2708 && (cdItem.LocalHeaderPos == firstItem.LocalHeaderPos))
2709 index = (int)i;
2710
2711 if (i == 0
2712 || cdItem.Disk < min_Disk
2713 || (cdItem.Disk == min_Disk && cdItem.LocalHeaderPos < min_LocalHeaderPos))
2714 {
2715 min_Disk = cdItem.Disk;
2716 min_LocalHeaderPos = cdItem.LocalHeaderPos;
2717 }
2718 }
2719 }
2720
2721 if (index == -1)
2722 res = S_FALSE;
2723 else if (!AreItemsEqual(firstItem, items[(unsigned)index]))
2724 res = S_FALSE;
2725 else
2726 {
2727 ArcInfo.CdWasRead = true;
2728 if (IsCdUnsorted)
2729 ArcInfo.FirstItemRelatOffset = min_LocalHeaderPos;
2730 else
2731 ArcInfo.FirstItemRelatOffset = items[0].LocalHeaderPos;
2732
2733 // ArcInfo.FirstItemRelatOffset = _startLocalFromCd_Offset;
2734 }
2735 }
2736 }
2737 }
2738 } // (MarkerIsFound && MarkerIsSafe)
2739
2740 } // (!onlyLocalsMode)
2741
2742
2743 CObjectVector<CItemEx> cdItems;
2744
2745 bool needSetBase = false; // we set needSetBase only for LOCALS_CD_MODE
2746 unsigned numCdItems = items.Size();
2747
2748 #ifdef ZIP_SELF_CHECK
2749 res = S_FALSE; // if uncommented, it uses additional LOCALS-CD-MODE mode to check the code
2750 #endif
2751
2752 if (res != S_OK)
2753 {
2754 // ---------- LOCALS-CD-MODE ----------
2755 // CD doesn't match firstItem,
2756 // so we clear items and read Locals and CD.
2757
2758 items.Clear();
2759 localsWereRead = true;
2760
2761 HeadersError = false;
2762 HeadersWarning = false;
2763 ExtraMinorError = false;
2764
2765 /* we can use any mode: with buffer and without buffer
2766 without buffer : skips packed data : fast for big files : slow for small files
2767 with buffer : reads packed data : slow for big files : fast for small files
2768 Buffer mode is more effective. */
2769 // _inBufMode = false;
2770 _inBufMode = true;
2771 // we could change the buffer size here, if we want smaller Buffer.
2772 // RINOK(ReAllocateBuffer(kSeqBufferSize));
2773 // InitBuf()
2774
2775 ArcInfo.Base = 0;
2776
2777 if (!Disable_FindMarker)
2778 {
2779 if (!MarkerIsFound)
2780 {
2781 if (!IsMultiVol)
2782 return S_FALSE;
2783 if (Vols.StartParsingVol != 0)
2784 return S_FALSE;
2785 // if (StartParsingVol == 0) and we didn't find marker, we use default zero marker.
2786 // so we suppose that there is no sfx stub
2787 RINOK(SeekToVol(0, ArcInfo.MarkerPos2))
2788 }
2789 else
2790 {
2791 if (ArcInfo.MarkerPos != 0)
2792 {
2793 /*
2794 If multi-vol or there is (No)Span-marker at start of stream, we set (Base) as 0.
2795 In another caes:
2796 (No)Span-marker is supposed as false positive. So we set (Base) as main marker (MarkerPos2).
2797 The (Base) can be corrected later after ECD reading.
2798 But sfx volume with stub and (No)Span-marker in (!IsMultiVol) mode will have incorrect (Base) here.
2799 */
2800 ArcInfo.Base = (Int64)ArcInfo.MarkerPos2;
2801 }
2802 RINOK(SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2))
2803 }
2804 }
2805 _cnt = 0;
2806
2807 ReadSignature();
2808
2809 LocalsWereRead = true;
2810
2811 RINOK(ReadLocals(items))
2812
2813 if (_signature != NSignature::kCentralFileHeader)
2814 {
2815 // GetVirtStreamPos() - 4
2816 if (items.IsEmpty())
2817 return S_FALSE;
2818
2819 bool isError = true;
2820
2821 const UInt32 apkSize = _signature;
2822 const unsigned kApkFooterSize = 16 + 8;
2823 if (apkSize >= kApkFooterSize && apkSize <= (1 << 20))
2824 {
2825 if (ReadUInt32() == 0)
2826 {
2827 CByteBuffer apk;
2828 apk.Alloc(apkSize);
2829 SafeRead(apk, apkSize);
2830 ReadSignature();
2831 const Byte *footer = apk + apkSize - kApkFooterSize;
2832 if (_signature == NSignature::kCentralFileHeader)
2833 if (GetUi64(footer) == apkSize)
2834 if (memcmp(footer + 8, "APK Sig Block 42", 16) == 0)
2835 {
2836 isError = false;
2837 IsApk = true;
2838 }
2839 }
2840 }
2841
2842 if (isError)
2843 {
2844 NoCentralDir = true;
2845 HeadersError = true;
2846 return S_OK;
2847 }
2848 }
2849
2850 _inBufMode = true;
2851
2852 cdAbsOffset = GetVirtStreamPos() - 4;
2853 cdDisk = (UInt32)Vols.StreamIndex;
2854
2855 #ifdef ZIP_SELF_CHECK
2856 if (!IsMultiVol && _cnt != GetVirtStreamPos() - ArcInfo.MarkerPos2)
2857 return E_FAIL;
2858 #endif
2859
2860 const UInt64 processedCnt_start = _cnt;
2861
2862 for (;;)
2863 {
2864 CItemEx cdItem;
2865
2866 RINOK(ReadCdItem(cdItem))
2867
2868 cdItems.Add(cdItem);
2869 if (Callback && (cdItems.Size() & 0xFFF) == 0)
2870 {
2871 const UInt64 numFiles = items.Size();
2872 const UInt64 numBytes = _cnt;
2873 RINOK(Callback->SetCompleted(&numFiles, &numBytes))
2874 }
2875 ReadSignature();
2876 if (_signature != NSignature::kCentralFileHeader)
2877 break;
2878 }
2879
2880 cdSize = _cnt - processedCnt_start;
2881
2882 #ifdef ZIP_SELF_CHECK
2883 if (!IsMultiVol)
2884 {
2885 if (_cnt != GetVirtStreamPos() - ArcInfo.MarkerPos2)
2886 return E_FAIL;
2887 if (cdSize != (GetVirtStreamPos() - 4) - cdAbsOffset)
2888 return E_FAIL;
2889 }
2890 #endif
2891
2892 needSetBase = true;
2893 numCdItems = cdItems.Size();
2894 cdRelatOffset = (UInt64)((Int64)cdAbsOffset - ArcInfo.Base);
2895
2896 if (!cdItems.IsEmpty())
2897 {
2898 ArcInfo.CdWasRead = true;
2899 ArcInfo.FirstItemRelatOffset = cdItems[0].LocalHeaderPos;
2900 }
2901 }
2902
2903
2904
2905 CCdInfo cdInfo;
2906 CLocator locator;
2907 bool isZip64 = false;
2908 const UInt64 ecd64AbsOffset = GetVirtStreamPos() - 4;
2909 int ecd64Disk = -1;
2910
2911 if (_signature == NSignature::kEcd64)
2912 {
2913 ecd64Disk = Vols.StreamIndex;
2914
2915 IsZip64 = isZip64 = true;
2916
2917 {
2918 const UInt64 recordSize = ReadUInt64();
2919 if (recordSize < kEcd64_MainSize
2920 || recordSize >= ((UInt64)1 << 62))
2921 {
2922 HeadersError = true;
2923 return S_OK;
2924 }
2925
2926 {
2927 const unsigned kBufSize = kEcd64_MainSize;
2928 Byte buf[kBufSize];
2929 SafeRead(buf, kBufSize);
2930 cdInfo.ParseEcd64e(buf);
2931 }
2932
2933 RINOK(Skip64(recordSize - kEcd64_MainSize, items.Size()))
2934 }
2935
2936
2937 ReadSignature();
2938
2939 if (_signature != NSignature::kEcd64Locator)
2940 {
2941 HeadersError = true;
2942 return S_OK;
2943 }
2944
2945 {
2946 const unsigned kBufSize = 16;
2947 Byte buf[kBufSize];
2948 SafeRead(buf, kBufSize);
2949 locator.Parse(buf);
2950 // we ignore the error, where some zip creators use (NumDisks == 0)
2951 // if (locator.NumDisks == 0) HeadersWarning = true;
2952 }
2953
2954 ReadSignature();
2955 }
2956
2957
2958 if (_signature != NSignature::kEcd)
2959 {
2960 HeadersError = true;
2961 return S_OK;
2962 }
2963
2964
2965 CanStartNewVol = false;
2966
2967 // ---------- ECD ----------
2968
2969 CEcd ecd;
2970 {
2971 const unsigned kBufSize = kEcdSize - 4;
2972 Byte buf[kBufSize];
2973 SafeRead(buf, kBufSize);
2974 ecd.Parse(buf);
2975 }
2976
2977 COPY_ECD_ITEM_16(ThisDisk)
2978 COPY_ECD_ITEM_16(CdDisk)
2979 COPY_ECD_ITEM_16(NumEntries_in_ThisDisk)
2980 COPY_ECD_ITEM_16(NumEntries)
2981 COPY_ECD_ITEM_32(Size)
2982 COPY_ECD_ITEM_32(Offset)
2983
2984 bool cdOK = true;
2985
2986 if ((UInt32)cdInfo.Size != (UInt32)cdSize)
2987 {
2988 // return S_FALSE;
2989 cdOK = false;
2990 }
2991
2992 if (isZip64)
2993 {
2994 if (cdInfo.NumEntries != numCdItems
2995 || cdInfo.Size != cdSize)
2996 {
2997 cdOK = false;
2998 }
2999 }
3000
3001
3002 if (IsMultiVol)
3003 {
3004 if (cdDisk != cdInfo.CdDisk)
3005 HeadersError = true;
3006 }
3007 else if (needSetBase && cdOK)
3008 {
3009 const UInt64 oldBase = (UInt64)ArcInfo.Base;
3010 // localsWereRead == true
3011 // ArcInfo.Base == ArcInfo.MarkerPos2
3012 // cdRelatOffset == (cdAbsOffset - ArcInfo.Base)
3013
3014 if (isZip64)
3015 {
3016 if (ecd64Disk == Vols.StartVolIndex)
3017 {
3018 const Int64 newBase = (Int64)ecd64AbsOffset - (Int64)locator.Ecd64Offset;
3019 if (newBase <= (Int64)ecd64AbsOffset)
3020 {
3021 if (!localsWereRead || newBase <= (Int64)ArcInfo.MarkerPos2)
3022 {
3023 ArcInfo.Base = newBase;
3024 cdRelatOffset = (UInt64)((Int64)cdAbsOffset - newBase);
3025 }
3026 else
3027 cdOK = false;
3028 }
3029 }
3030 }
3031 else if (numCdItems != 0) // we can't use ecd.Offset in empty archive?
3032 {
3033 if ((int)cdDisk == Vols.StartVolIndex)
3034 {
3035 const Int64 newBase = (Int64)cdAbsOffset - (Int64)cdInfo.Offset;
3036 if (newBase <= (Int64)cdAbsOffset)
3037 {
3038 if (!localsWereRead || newBase <= (Int64)ArcInfo.MarkerPos2)
3039 {
3040 // cd can be more accurate, when it points before Locals
3041 // so we change Base and cdRelatOffset
3042 ArcInfo.Base = newBase;
3043 cdRelatOffset = cdInfo.Offset;
3044 }
3045 else
3046 {
3047 // const UInt64 delta = ((UInt64)cdRelatOffset - cdInfo.Offset);
3048 const UInt64 delta = ((UInt64)(newBase - ArcInfo.Base));
3049 if ((UInt32)delta == 0)
3050 {
3051 // we set Overflow32bit mode, only if there is (x<<32) offset
3052 // between real_CD_offset_from_MarkerPos and CD_Offset_in_ECD.
3053 // Base and cdRelatOffset unchanged
3054 Overflow32bit = true;
3055 }
3056 else
3057 cdOK = false;
3058 }
3059 }
3060 else
3061 cdOK = false;
3062 }
3063 }
3064 // cdRelatOffset = cdAbsOffset - ArcInfo.Base;
3065
3066 if (localsWereRead)
3067 {
3068 const UInt64 delta = (UInt64)((Int64)oldBase - ArcInfo.Base);
3069 if (delta != 0)
3070 {
3071 FOR_VECTOR (i, items)
3072 items[i].LocalHeaderPos += delta;
3073 }
3074 }
3075 }
3076
3077 if (!cdOK)
3078 HeadersError = true;
3079
3080 EcdVolIndex = cdInfo.ThisDisk;
3081
3082 if (!IsMultiVol)
3083 {
3084 if (EcdVolIndex == 0 && Vols.MissingZip && Vols.StartIsExe)
3085 {
3086 Vols.MissingName.Empty();
3087 Vols.MissingZip = false;
3088 }
3089
3090 if (localsWereRead)
3091 {
3092 if (EcdVolIndex != 0)
3093 {
3094 FOR_VECTOR (i, items)
3095 items[i].Disk = EcdVolIndex;
3096 }
3097 }
3098
3099 UseDisk_in_SingleVol = true;
3100 }
3101
3102 if (isZip64)
3103 {
3104 if ((cdInfo.ThisDisk == 0 && ecd64AbsOffset != (UInt64)(ArcInfo.Base + (Int64)locator.Ecd64Offset))
3105 // || cdInfo.NumEntries_in_ThisDisk != numCdItems
3106 || cdInfo.NumEntries != numCdItems
3107 || cdInfo.Size != cdSize
3108 || (cdInfo.Offset != cdRelatOffset && !items.IsEmpty()))
3109 {
3110 HeadersError = true;
3111 return S_OK;
3112 }
3113 }
3114
3115 if (cdOK && !cdItems.IsEmpty())
3116 {
3117 // ---------- merge Central Directory Items ----------
3118
3119 CRecordVector<unsigned> items2;
3120
3121 int nextLocalIndex = 0;
3122
3123 LocalsCenterMerged = true;
3124
3125 FOR_VECTOR (i, cdItems)
3126 {
3127 if (Callback)
3128 if ((i & 0x3FFF) == 0)
3129 {
3130 const UInt64 numFiles64 = items.Size() + items2.Size();
3131 RINOK(Callback->SetCompleted(&numFiles64, &_cnt))
3132 }
3133
3134 const CItemEx &cdItem = cdItems[i];
3135
3136 int index = -1;
3137
3138 if (nextLocalIndex != -1)
3139 {
3140 if ((unsigned)nextLocalIndex < items.Size())
3141 {
3142 CItemEx &item = items[(unsigned)nextLocalIndex];
3143 if (item.Disk == cdItem.Disk &&
3144 (item.LocalHeaderPos == cdItem.LocalHeaderPos
3145 || (Overflow32bit && (UInt32)item.LocalHeaderPos == cdItem.LocalHeaderPos)))
3146 index = nextLocalIndex++;
3147 else
3148 nextLocalIndex = -1;
3149 }
3150 }
3151
3152 if (index == -1)
3153 index = FindItem(items, cdItem);
3154
3155 // index = -1;
3156
3157 if (index == -1)
3158 {
3159 items2.Add(i);
3160 HeadersError = true;
3161 continue;
3162 }
3163
3164 CItemEx &item = items[(unsigned)index];
3165 if (item.Name != cdItem.Name
3166 // || item.Name.Len() != cdItem.Name.Len()
3167 || item.PackSize != cdItem.PackSize
3168 || item.Size != cdItem.Size
3169 // item.ExtractVersion != cdItem.ExtractVersion
3170 || !FlagsAreSame(item, cdItem)
3171 || item.Crc != cdItem.Crc)
3172 {
3173 HeadersError = true;
3174 continue;
3175 }
3176
3177 // item.Name = cdItem.Name;
3178 item.MadeByVersion = cdItem.MadeByVersion;
3179 item.CentralExtra = cdItem.CentralExtra;
3180 item.InternalAttrib = cdItem.InternalAttrib;
3181 item.ExternalAttrib = cdItem.ExternalAttrib;
3182 item.Comment = cdItem.Comment;
3183 item.FromCentral = cdItem.FromCentral;
3184 // 22.02: we force utf8 flag, if central header has utf8 flag
3185 if (cdItem.Flags & NFileHeader::NFlags::kUtf8)
3186 item.Flags |= NFileHeader::NFlags::kUtf8;
3187 }
3188
3189 FOR_VECTOR (k, items2)
3190 items.Add(cdItems[items2[k]]);
3191 }
3192
3193 if (ecd.NumEntries < ecd.NumEntries_in_ThisDisk)
3194 HeadersError = true;
3195
3196 if (ecd.ThisDisk == 0)
3197 {
3198 // if (isZip64)
3199 {
3200 if (ecd.NumEntries != ecd.NumEntries_in_ThisDisk)
3201 HeadersError = true;
3202 }
3203 }
3204
3205 if (isZip64)
3206 {
3207 if (cdInfo.NumEntries != items.Size()
3208 || (ecd.NumEntries != items.Size() && ecd.NumEntries != 0xFFFF))
3209 HeadersError = true;
3210 }
3211 else
3212 {
3213 // old 7-zip could store 32-bit number of CD items to 16-bit field.
3214 // if (ecd.NumEntries != items.Size())
3215 if (ecd.NumEntries > items.Size())
3216 HeadersError = true;
3217
3218 if (cdInfo.NumEntries != numCdItems)
3219 {
3220 if ((UInt16)cdInfo.NumEntries != (UInt16)numCdItems)
3221 HeadersError = true;
3222 else
3223 Cd_NumEntries_Overflow_16bit = true;
3224 }
3225 }
3226
3227 ReadBuffer(ArcInfo.Comment, ecd.CommentSize);
3228
3229 _inBufMode = false;
3230
3231 // DisableBufMode();
3232 // Buffer.Free();
3233 /* we can't clear buf varibles. we need them to calculate PhySize of archive */
3234
3235 if ((UInt16)cdInfo.NumEntries != (UInt16)numCdItems
3236 || (UInt32)cdInfo.Size != (UInt32)cdSize
3237 || ((UInt32)cdInfo.Offset != (UInt32)cdRelatOffset && !items.IsEmpty()))
3238 {
3239 // return S_FALSE;
3240 HeadersError = true;
3241 }
3242
3243 #ifdef ZIP_SELF_CHECK
3244 if (localsWereRead)
3245 {
3246 const UInt64 endPos = ArcInfo.MarkerPos2 + _cnt;
3247 if (endPos != (IsMultiVol ? Vols.TotalBytesSize : ArcInfo.FileEndPos))
3248 {
3249 // there are some data after the end of archive or error in code;
3250 return E_FAIL;
3251 }
3252 }
3253 #endif
3254
3255 // printf("\nOpen OK");
3256 return S_OK;
3257 }
3258
3259
3260
Open(IInStream * stream,const UInt64 * searchLimit,IArchiveOpenCallback * callback,CObjectVector<CItemEx> & items)3261 HRESULT CInArchive::Open(IInStream *stream, const UInt64 *searchLimit,
3262 IArchiveOpenCallback *callback, CObjectVector<CItemEx> &items)
3263 {
3264 items.Clear();
3265
3266 Close();
3267
3268 UInt64 startPos;
3269 RINOK(InStream_GetPos(stream, startPos))
3270 RINOK(InStream_GetSize_SeekToEnd(stream, ArcInfo.FileEndPos))
3271 _streamPos = ArcInfo.FileEndPos;
3272
3273 StartStream = stream;
3274 Stream = stream;
3275 Callback = callback;
3276
3277 DisableBufMode();
3278
3279 bool volWasRequested = false;
3280
3281 if (!Disable_VolsRead)
3282 if (callback
3283 && (startPos == 0 || !searchLimit || *searchLimit != 0))
3284 {
3285 // we try to read volumes only if it's first call (offset == 0) or scan is allowed.
3286 volWasRequested = true;
3287 RINOK(ReadVols())
3288 }
3289
3290 if (Disable_FindMarker)
3291 {
3292 RINOK(SeekToVol(-1, startPos))
3293 StreamRef = stream;
3294 Stream = stream;
3295 MarkerIsFound = true;
3296 MarkerIsSafe = true;
3297 ArcInfo.MarkerPos = startPos;
3298 ArcInfo.MarkerPos2 = startPos;
3299 }
3300 else
3301 if (IsMultiVol && Vols.StartParsingVol == 0 && (unsigned)Vols.StartParsingVol < Vols.Streams.Size())
3302 {
3303 // only StartParsingVol = 0 is safe search.
3304 RINOK(SeekToVol(0, 0))
3305 // if (Stream)
3306 {
3307 // UInt64 limit = 1 << 22; // for sfx
3308 UInt64 limit = 0; // without sfx
3309
3310 HRESULT res = FindMarker(&limit);
3311
3312 if (res == S_OK)
3313 {
3314 MarkerIsFound = true;
3315 MarkerIsSafe = true;
3316 }
3317 else if (res != S_FALSE)
3318 return res;
3319 }
3320 }
3321 else
3322 {
3323 // printf("\nOpen offset = %u\n", (unsigned)startPos);
3324 if (IsMultiVol
3325 && (unsigned)Vols.StartParsingVol < Vols.Streams.Size()
3326 && Vols.Streams[(unsigned)Vols.StartParsingVol].Stream)
3327 {
3328 RINOK(SeekToVol(Vols.StartParsingVol, Vols.StreamIndex == Vols.StartVolIndex ? startPos : 0))
3329 }
3330 else
3331 {
3332 RINOK(SeekToVol(-1, startPos))
3333 }
3334
3335 // UInt64 limit = 1 << 22;
3336 // HRESULT res = FindMarker(&limit);
3337
3338 HRESULT res = FindMarker(searchLimit);
3339
3340 // const UInt64 curPos = GetVirtStreamPos();
3341 const UInt64 curPos = ArcInfo.MarkerPos2 + 4;
3342
3343 if (res == S_OK)
3344 MarkerIsFound = true;
3345 else if (!IsMultiVol)
3346 {
3347 /*
3348 // if (startPos != 0), probably CD could be already tested with another call with (startPos == 0).
3349 // so we don't want to try to open CD again in that case.
3350 if (startPos != 0)
3351 return res;
3352 // we can try to open CD, if there is no Marker and (startPos == 0).
3353 // is it OK to open such files as ZIP, or big number of false positive, when CD can be find in end of file ?
3354 */
3355 return res;
3356 }
3357
3358 if (ArcInfo.IsSpanMode && !volWasRequested)
3359 {
3360 RINOK(ReadVols())
3361 if (IsMultiVol && MarkerIsFound && ArcInfo.MarkerVolIndex < 0)
3362 ArcInfo.MarkerVolIndex = Vols.StartVolIndex;
3363 }
3364
3365 MarkerIsSafe = !IsMultiVol
3366 || (ArcInfo.MarkerVolIndex == 0 && ArcInfo.MarkerPos == 0)
3367 ;
3368
3369
3370 if (IsMultiVol)
3371 {
3372 if ((unsigned)Vols.StartVolIndex < Vols.Streams.Size())
3373 {
3374 Stream = Vols.Streams[(unsigned)Vols.StartVolIndex].Stream;
3375 if (Stream)
3376 {
3377 RINOK(Seek_SavePos(curPos))
3378 }
3379 else
3380 IsMultiVol = false;
3381 }
3382 else
3383 IsMultiVol = false;
3384 }
3385
3386 if (!IsMultiVol)
3387 {
3388 if (Vols.StreamIndex != -1)
3389 {
3390 Stream = StartStream;
3391 Vols.StreamIndex = -1;
3392 InitBuf();
3393 RINOK(Seek_SavePos(curPos))
3394 }
3395
3396 ArcInfo.MarkerVolIndex = -1;
3397 StreamRef = stream;
3398 Stream = stream;
3399 }
3400 }
3401
3402
3403 if (!IsMultiVol)
3404 Vols.ClearRefs();
3405
3406 {
3407 HRESULT res;
3408 try
3409 {
3410 res = ReadHeaders(items);
3411 }
3412 catch (const CSystemException &e) { res = e.ErrorCode; }
3413 catch (const CUnexpectEnd &)
3414 {
3415 if (items.IsEmpty())
3416 return S_FALSE;
3417 UnexpectedEnd = true;
3418 res = S_OK;
3419 }
3420 catch (...)
3421 {
3422 DisableBufMode();
3423 throw;
3424 }
3425
3426 if (IsMultiVol)
3427 {
3428 ArcInfo.FinishPos = ArcInfo.FileEndPos;
3429 if ((unsigned)Vols.StreamIndex < Vols.Streams.Size())
3430 if (GetVirtStreamPos() < Vols.Streams[(unsigned)Vols.StreamIndex].Size)
3431 ArcInfo.ThereIsTail = true;
3432 }
3433 else
3434 {
3435 ArcInfo.FinishPos = GetVirtStreamPos();
3436 ArcInfo.ThereIsTail = (ArcInfo.FileEndPos > ArcInfo.FinishPos);
3437 }
3438
3439 DisableBufMode();
3440
3441 IsArcOpen = true;
3442 if (!IsMultiVol)
3443 Vols.Streams.Clear();
3444 return res;
3445 }
3446 }
3447
3448
GetItemStream(const CItemEx & item,bool seekPackData,CMyComPtr<ISequentialInStream> & stream)3449 HRESULT CInArchive::GetItemStream(const CItemEx &item, bool seekPackData, CMyComPtr<ISequentialInStream> &stream)
3450 {
3451 stream.Release();
3452
3453 UInt64 pos = item.LocalHeaderPos;
3454 if (seekPackData)
3455 pos += item.LocalFullHeaderSize;
3456
3457 if (!IsMultiVol)
3458 {
3459 if (UseDisk_in_SingleVol && item.Disk != EcdVolIndex)
3460 return S_OK;
3461 pos = (UInt64)((Int64)pos + ArcInfo.Base);
3462 RINOK(InStream_SeekSet(StreamRef, pos))
3463 stream = StreamRef;
3464 return S_OK;
3465 }
3466
3467 if (item.Disk >= Vols.Streams.Size())
3468 return S_OK;
3469
3470 IInStream *str2 = Vols.Streams[item.Disk].Stream;
3471 if (!str2)
3472 return S_OK;
3473 RINOK(InStream_SeekSet(str2, pos))
3474
3475 Vols.NeedSeek = false;
3476 Vols.StreamIndex = (int)item.Disk;
3477
3478 CVolStream *volsStreamSpec = new CVolStream;
3479 volsStreamSpec->Vols = &Vols;
3480 stream = volsStreamSpec;
3481
3482 return S_OK;
3483 }
3484
3485 }}
3486