xref: /aosp_15_r20/external/lzma/CPP/7zip/Archive/Zip/ZipIn.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // Archive/ZipIn.cpp
2 
3 #include "StdAfx.h"
4 
5 // #include <stdio.h>
6 
7 #include "../../../Common/DynamicBuffer.h"
8 #include "../../../Common/IntToString.h"
9 #include "../../../Common/MyException.h"
10 #include "../../../Common/StringToInt.h"
11 
12 #include "../../../Windows/PropVariant.h"
13 
14 #include "../IArchive.h"
15 
16 #include "ZipIn.h"
17 
18 #define Get16(p) GetUi16(p)
19 #define Get32(p) GetUi32(p)
20 #define Get64(p) GetUi64(p)
21 
22 #define G16(offs, v) v = Get16(p + (offs))
23 #define G32(offs, v) v = Get32(p + (offs))
24 #define G64(offs, v) v = Get64(p + (offs))
25 
26 namespace NArchive {
27 namespace NZip {
28 
29 /* we try to use same size of Buffer (1 << 17) for all tasks.
30    it allow to avoid reallocations and cache clearing. */
31 
32 static const size_t kSeqBufferSize = (size_t)1 << 17;
33 
34 /*
35 Open()
36 {
37   _inBufMode = false;
38   ReadVols()
39     FindCd();
40       TryEcd64()
41   SeekToVol()
42   FindMarker()
43     _inBufMode = true;
44   ReadHeaders()
45     _inBufMode = false;
46     ReadCd()
47       FindCd()
48         TryEcd64()
49       TryReadCd()
50       {
51         SeekToVol();
52         _inBufMode = true;
53       }
54     _inBufMode = true;
55     ReadLocals()
56     ReadCdItem()
57     ....
58 }
59 FindCd() writes to Buffer without touching (_inBufMode)
60 */
61 
62 /*
63   if (not defined ZIP_SELF_CHECK) : it reads CD and if error in first pass CD reading, it reads LOCALS-CD-MODE
64   if (    defined ZIP_SELF_CHECK) : it always reads CD and LOCALS-CD-MODE
65   use ZIP_SELF_CHECK to check LOCALS-CD-MODE for any zip archive
66 */
67 
68 // #define ZIP_SELF_CHECK
69 
70 
71 struct CEcd
72 {
73   UInt16 ThisDisk;
74   UInt16 CdDisk;
75   UInt16 NumEntries_in_ThisDisk;
76   UInt16 NumEntries;
77   UInt32 Size;
78   UInt32 Offset;
79   UInt16 CommentSize;
80 
IsEmptyArcNArchive::NZip::CEcd81   bool IsEmptyArc() const
82   {
83     return ThisDisk == 0
84         && CdDisk == 0
85         && NumEntries_in_ThisDisk == 0
86         && NumEntries == 0
87         && Size == 0
88         && Offset == 0 // test it
89     ;
90   }
91 
92   void Parse(const Byte *p); // (p) doesn't include signature
93 };
94 
Parse(const Byte * p)95 void CEcd::Parse(const Byte *p)
96 {
97   // (p) doesn't include signature
98   G16(0, ThisDisk);
99   G16(2, CdDisk);
100   G16(4, NumEntries_in_ThisDisk);
101   G16(6, NumEntries);
102   G32(8, Size);
103   G32(12, Offset);
104   G16(16, CommentSize);
105 }
106 
107 
ParseEcd32(const Byte * p)108 void CCdInfo::ParseEcd32(const Byte *p)
109 {
110   IsFromEcd64 = false;
111   // (p) includes signature
112   p += 4;
113   G16(0, ThisDisk);
114   G16(2, CdDisk);
115   G16(4, NumEntries_in_ThisDisk);
116   G16(6, NumEntries);
117   G32(8, Size);
118   G32(12, Offset);
119   G16(16, CommentSize);
120 }
121 
ParseEcd64e(const Byte * p)122 void CCdInfo::ParseEcd64e(const Byte *p)
123 {
124   IsFromEcd64 = true;
125   // (p) exclude signature
126   G16(0, VersionMade);
127   G16(2, VersionNeedExtract);
128   G32(4, ThisDisk);
129   G32(8, CdDisk);
130 
131   G64(12, NumEntries_in_ThisDisk);
132   G64(20, NumEntries);
133   G64(28, Size);
134   G64(36, Offset);
135 }
136 
137 
138 struct CLocator
139 {
140   UInt32 Ecd64Disk;
141   UInt32 NumDisks;
142   UInt64 Ecd64Offset;
143 
CLocatorNArchive::NZip::CLocator144   CLocator(): Ecd64Disk(0), NumDisks(0), Ecd64Offset(0) {}
145 
ParseNArchive::NZip::CLocator146   void Parse(const Byte *p)
147   {
148     G32(0, Ecd64Disk);
149     G64(4, Ecd64Offset);
150     G32(12, NumDisks);
151   }
152 
IsEmptyArcNArchive::NZip::CLocator153   bool IsEmptyArc() const
154   {
155     return Ecd64Disk == 0 && NumDisks == 0 && Ecd64Offset == 0;
156   }
157 };
158 
159 
160 
161 
ClearRefs()162 void CInArchive::ClearRefs()
163 {
164   StreamRef.Release();
165   Stream = NULL;
166   StartStream = NULL;
167   Callback = NULL;
168 
169   Vols.Clear();
170 }
171 
Close()172 void CInArchive::Close()
173 {
174   _cnt = 0;
175   DisableBufMode();
176 
177   IsArcOpen = false;
178 
179   IsArc = false;
180   IsZip64 = false;
181 
182   IsApk = false;
183   IsCdUnsorted = false;
184 
185   HeadersError = false;
186   HeadersWarning = false;
187   ExtraMinorError = false;
188 
189   UnexpectedEnd = false;
190   LocalsWereRead = false;
191   LocalsCenterMerged = false;
192   NoCentralDir = false;
193   Overflow32bit = false;
194   Cd_NumEntries_Overflow_16bit = false;
195 
196   MarkerIsFound = false;
197   MarkerIsSafe = false;
198 
199   IsMultiVol = false;
200   UseDisk_in_SingleVol = false;
201   EcdVolIndex = 0;
202 
203   ArcInfo.Clear();
204 
205   ClearRefs();
206 }
207 
208 
209 
Seek_SavePos(UInt64 offset)210 HRESULT CInArchive::Seek_SavePos(UInt64 offset)
211 {
212   // InitBuf();
213   // if (!Stream) return S_FALSE;
214   return Stream->Seek((Int64)offset, STREAM_SEEK_SET, &_streamPos);
215 }
216 
217 
218 /* SeekToVol() will keep the cached mode, if new volIndex is
219    same Vols.StreamIndex volume, and offset doesn't go out of cached region */
220 
SeekToVol(int volIndex,UInt64 offset)221 HRESULT CInArchive::SeekToVol(int volIndex, UInt64 offset)
222 {
223   if (volIndex != Vols.StreamIndex)
224   {
225     if (IsMultiVol && volIndex >= 0)
226     {
227       if ((unsigned)volIndex >= Vols.Streams.Size())
228         return S_FALSE;
229       if (!Vols.Streams[(unsigned)volIndex].Stream)
230         return S_FALSE;
231       Stream = Vols.Streams[(unsigned)volIndex].Stream;
232     }
233     else if (volIndex == -2)
234     {
235       if (!Vols.ZipStream)
236         return S_FALSE;
237       Stream = Vols.ZipStream;
238     }
239     else
240       Stream = StartStream;
241     Vols.StreamIndex = volIndex;
242   }
243   else
244   {
245     if (offset <= _streamPos)
246     {
247       const UInt64 back = _streamPos - offset;
248       if (back <= _bufCached)
249       {
250         _bufPos = _bufCached - (size_t)back;
251         return S_OK;
252       }
253     }
254   }
255   InitBuf();
256   return Seek_SavePos(offset);
257 }
258 
259 
AllocateBuffer(size_t size)260 HRESULT CInArchive::AllocateBuffer(size_t size)
261 {
262   if (size <= Buffer.Size())
263     return S_OK;
264   /* in cached mode virtual_pos is not equal to phy_pos (_streamPos)
265      so we change _streamPos and do Seek() to virtual_pos before cache clearing */
266   if (_bufPos != _bufCached)
267   {
268     RINOK(Seek_SavePos(GetVirtStreamPos()))
269   }
270   InitBuf();
271   Buffer.AllocAtLeast(size);
272   if (!Buffer.IsAllocated())
273     return E_OUTOFMEMORY;
274   return S_OK;
275 }
276 
277 // ---------- ReadFromCache ----------
278 // reads from cache and from Stream
279 // move to next volume can be allowed if (CanStartNewVol) and only before first byte reading
280 
ReadFromCache(Byte * data,unsigned size,unsigned & processed)281 HRESULT CInArchive::ReadFromCache(Byte *data, unsigned size, unsigned &processed)
282 {
283   HRESULT result = S_OK;
284   processed = 0;
285 
286   for (;;)
287   {
288     if (size == 0)
289       return S_OK;
290 
291     const size_t avail = GetAvail();
292 
293     if (avail != 0)
294     {
295       unsigned cur = size;
296       if (cur > avail)
297         cur = (unsigned)avail;
298       memcpy(data, (const Byte *)Buffer + _bufPos, cur);
299 
300       data += cur;
301       size -= cur;
302       processed += cur;
303 
304       _bufPos += cur;
305       _cnt += cur;
306 
307       CanStartNewVol = false;
308 
309       continue;
310     }
311 
312     InitBuf();
313 
314     if (_inBufMode)
315     {
316       UInt32 cur = 0;
317       result = Stream->Read(Buffer, (UInt32)Buffer.Size(), &cur);
318       _bufPos = 0;
319       _bufCached = cur;
320       _streamPos += cur;
321       if (cur != 0)
322         CanStartNewVol = false;
323       if (result != S_OK)
324         break;
325       if (cur != 0)
326         continue;
327     }
328     else
329     {
330       size_t cur = size;
331       result = ReadStream(Stream, data, &cur);
332       data += cur;
333       size -= (unsigned)cur;
334       processed += (unsigned)cur;
335       _streamPos += cur;
336       _cnt += cur;
337       if (cur != 0)
338       {
339         CanStartNewVol = false;
340         break;
341       }
342       if (result != S_OK)
343         break;
344     }
345 
346     if (   !IsMultiVol
347         || !CanStartNewVol
348         || Vols.StreamIndex < 0
349         || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size())
350       break;
351 
352     const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex + 1];
353     if (!s.Stream)
354       break;
355     result = s.SeekToStart();
356     if (result != S_OK)
357       break;
358     Vols.StreamIndex++;
359     _streamPos = 0;
360     // Vols.NeedSeek = false;
361 
362     Stream = s.Stream;
363   }
364 
365   return result;
366 }
367 
368 
ReadFromCache_FALSE(Byte * data,unsigned size)369 HRESULT CInArchive::ReadFromCache_FALSE(Byte *data, unsigned size)
370 {
371   unsigned processed;
372   HRESULT res = ReadFromCache(data, size, processed);
373   if (res == S_OK && size != processed)
374     return S_FALSE;
375   return res;
376 }
377 
378 
CheckDosTime(UInt32 dosTime)379 static bool CheckDosTime(UInt32 dosTime)
380 {
381   if (dosTime == 0)
382     return true;
383   unsigned month = (dosTime >> 21) & 0xF;
384   unsigned day = (dosTime >> 16) & 0x1F;
385   unsigned hour = (dosTime >> 11) & 0x1F;
386   unsigned min = (dosTime >> 5) & 0x3F;
387   unsigned sec = (dosTime & 0x1F) * 2;
388   if (month < 1 || month > 12 || day < 1 || day > 31 || hour > 23 || min > 59 || sec > 59)
389     return false;
390   return true;
391 }
392 
IsArc_Zip(const Byte * p,size_t size)393 API_FUNC_IsArc IsArc_Zip(const Byte *p, size_t size)
394 {
395   if (size < 8)
396     return k_IsArc_Res_NEED_MORE;
397   if (p[0] != 'P')
398     return k_IsArc_Res_NO;
399 
400   UInt32 sig = Get32(p);
401 
402   if (sig == NSignature::kNoSpan || sig == NSignature::kSpan)
403   {
404     p += 4;
405     size -= 4;
406   }
407 
408   sig = Get32(p);
409 
410   if (sig == NSignature::kEcd64)
411   {
412     if (size < kEcd64_FullSize)
413       return k_IsArc_Res_NEED_MORE;
414 
415     const UInt64 recordSize = Get64(p + 4);
416     if (   recordSize < kEcd64_MainSize
417         || recordSize > kEcd64_MainSize + (1 << 20))
418       return k_IsArc_Res_NO;
419     CCdInfo cdInfo;
420     cdInfo.ParseEcd64e(p + 12);
421     if (!cdInfo.IsEmptyArc())
422       return k_IsArc_Res_NO;
423     return k_IsArc_Res_YES; // k_IsArc_Res_YES_2;
424   }
425 
426   if (sig == NSignature::kEcd)
427   {
428     if (size < kEcdSize)
429       return k_IsArc_Res_NEED_MORE;
430     CEcd ecd;
431     ecd.Parse(p + 4);
432     // if (ecd.cdSize != 0)
433     if (!ecd.IsEmptyArc())
434       return k_IsArc_Res_NO;
435     return k_IsArc_Res_YES; // k_IsArc_Res_YES_2;
436   }
437 
438   if (sig != NSignature::kLocalFileHeader)
439     return k_IsArc_Res_NO;
440 
441   if (size < kLocalHeaderSize)
442     return k_IsArc_Res_NEED_MORE;
443 
444   p += 4;
445 
446   {
447     const unsigned kPureHeaderSize = kLocalHeaderSize - 4;
448     unsigned i;
449     for (i = 0; i < kPureHeaderSize && p[i] == 0; i++);
450     if (i == kPureHeaderSize)
451       return k_IsArc_Res_NEED_MORE;
452   }
453 
454   /*
455   if (p[0] >= 128) // ExtractVersion.Version;
456     return k_IsArc_Res_NO;
457   */
458 
459   // ExtractVersion.Version = p[0];
460   // ExtractVersion.HostOS = p[1];
461   // Flags = Get16(p + 2);
462   // Method = Get16(p + 4);
463   /*
464   // 9.33: some zip archives contain incorrect value in timestamp. So we don't check it now
465   UInt32 dosTime = Get32(p + 6);
466   if (!CheckDosTime(dosTime))
467     return k_IsArc_Res_NO;
468   */
469   // Crc = Get32(p + 10);
470   // PackSize = Get32(p + 14);
471   // Size = Get32(p + 18);
472   const unsigned nameSize = Get16(p + 22);
473   unsigned extraSize = Get16(p + 24);
474   const UInt32 extraOffset = kLocalHeaderSize + (UInt32)nameSize;
475 
476   /*
477   // 21.02: fixed. we don't use the following check
478   if (extraOffset + extraSize > (1 << 16))
479     return k_IsArc_Res_NO;
480   */
481 
482   p -= 4;
483 
484   {
485     size_t rem = size - kLocalHeaderSize;
486     if (rem > nameSize)
487       rem = nameSize;
488     const Byte *p2 = p + kLocalHeaderSize;
489     for (size_t i = 0; i < rem; i++)
490       if (p2[i] == 0)
491       {
492         // we support some "bad" zip archives that contain zeros after name
493         for (size_t k = i + 1; k < rem; k++)
494           if (p2[k] != 0)
495             return k_IsArc_Res_NO;
496         break;
497         /*
498         if (i != nameSize - 1)
499           return k_IsArc_Res_NO;
500         */
501       }
502   }
503 
504   if (size < extraOffset)
505     return k_IsArc_Res_NEED_MORE;
506 
507   if (extraSize > 0)
508   {
509     p += extraOffset;
510     size -= extraOffset;
511     while (extraSize != 0)
512     {
513       if (extraSize < 4)
514       {
515         // 7-Zip before 9.31 created incorrect WzAES Extra in folder's local headers.
516         // so we return k_IsArc_Res_YES to support such archives.
517         // return k_IsArc_Res_NO; // do we need to support such extra ?
518         return k_IsArc_Res_YES;
519       }
520       if (size < 4)
521         return k_IsArc_Res_NEED_MORE;
522       unsigned dataSize = Get16(p + 2);
523       size -= 4;
524       extraSize -= 4;
525       p += 4;
526       if (dataSize > extraSize)
527       {
528         // It can be error on header.
529         // We want to support such rare case bad archives.
530         // We use additional checks to reduce false-positive probability.
531         if (nameSize == 0
532             || nameSize > (1 << 9)
533             || extraSize > (1 << 9))
534           return k_IsArc_Res_NO;
535         return k_IsArc_Res_YES;
536       }
537       if (dataSize > size)
538         return k_IsArc_Res_NEED_MORE;
539       size -= dataSize;
540       extraSize -= dataSize;
541       p += dataSize;
542     }
543   }
544 
545   return k_IsArc_Res_YES;
546 }
547 
IsArc_Zip_2(const Byte * p,size_t size,bool isFinal)548 static UInt32 IsArc_Zip_2(const Byte *p, size_t size, bool isFinal)
549 {
550   UInt32 res = IsArc_Zip(p, size);
551   if (res == k_IsArc_Res_NEED_MORE && isFinal)
552     return k_IsArc_Res_NO;
553   return res;
554 }
555 
556 
557 
558 /* FindPK_4() is allowed to access data up to and including &limit[3].
559    limit[4] access is not allowed.
560   return:
561     (return_ptr <  limit) : "PK" was found at (return_ptr)
562     (return_ptr >= limit) : limit was reached or crossed. So no "PK" found before limit
563 */
564 Z7_NO_INLINE
FindPK_4(const Byte * p,const Byte * limit)565 static const Byte *FindPK_4(const Byte *p, const Byte *limit)
566 {
567   for (;;)
568   {
569     for (;;)
570     {
571       if (p >= limit)
572         return limit;
573       Byte b = p[1];
574       if (b == 0x4B) { if (p[0] == 0x50) { return p;     } p += 1; break; }
575       if (b == 0x50) { if (p[2] == 0x4B) { return p + 1; } p += 2; break; }
576       b = p[3];
577       p += 4;
578       if (b == 0x4B) { if (p[-2]== 0x50) { return p - 2; } p -= 1; break; }
579       if (b == 0x50) { if (p[0] == 0x4B) { return p - 1; }         break; }
580     }
581   }
582   /*
583   for (;;)
584   {
585     for (;;)
586     {
587       if (p >= limit)
588         return limit;
589       if (*p++ == 0x50) break;
590       if (*p++ == 0x50) break;
591       if (*p++ == 0x50) break;
592       if (*p++ == 0x50) break;
593     }
594     if (*p == 0x4B)
595       return p - 1;
596   }
597   */
598 }
599 
600 
601 /*
602 ---------- FindMarker ----------
603 returns:
604   S_OK:
605     ArcInfo.MarkerVolIndex : volume of marker
606     ArcInfo.MarkerPos   : Pos of first signature
607     ArcInfo.MarkerPos2  : Pos of main signature (local item signature in most cases)
608     _streamPos          : stream pos
609     _cnt                : The number of virtal Bytes after start of search to offset after signature
610     _signature          : main signature
611 
612   S_FALSE: can't find marker, or there is some non-zip data after marker
613 
614   Error code: stream reading error.
615 */
616 
FindMarker(const UInt64 * searchLimit)617 HRESULT CInArchive::FindMarker(const UInt64 *searchLimit)
618 {
619   ArcInfo.MarkerPos = GetVirtStreamPos();
620   ArcInfo.MarkerPos2 = ArcInfo.MarkerPos;
621   ArcInfo.MarkerVolIndex = Vols.StreamIndex;
622 
623   _cnt = 0;
624 
625   CanStartNewVol = false;
626 
627   if (searchLimit && *searchLimit == 0)
628   {
629     Byte startBuf[kMarkerSize];
630     RINOK(ReadFromCache_FALSE(startBuf, kMarkerSize))
631 
632     UInt32 marker = Get32(startBuf);
633     _signature = marker;
634 
635     if (   marker == NSignature::kNoSpan
636         || marker == NSignature::kSpan)
637     {
638       RINOK(ReadFromCache_FALSE(startBuf, kMarkerSize))
639       _signature = Get32(startBuf);
640     }
641 
642     if (   _signature != NSignature::kEcd
643         && _signature != NSignature::kEcd64
644         && _signature != NSignature::kLocalFileHeader)
645       return S_FALSE;
646 
647     ArcInfo.MarkerPos2 = GetVirtStreamPos() - 4;
648     ArcInfo.IsSpanMode = (marker == NSignature::kSpan);
649 
650     // we use weak test in case of (*searchLimit == 0)
651     // since error will be detected later in Open function
652     return S_OK;
653   }
654 
655   // zip specification: (_zip_header_size < (1 << 16))
656   // so we need such size to check header
657   const size_t kCheckSize = (size_t)1 << 16;
658   const size_t kBufSize   = (size_t)1 << 17; // (kBufSize must be > kCheckSize)
659 
660   RINOK(AllocateBuffer(kBufSize))
661 
662   _inBufMode = true;
663 
664   UInt64 progressPrev = 0;
665 
666   for (;;)
667   {
668     RINOK(LookAhead(kBufSize))
669 
670     const size_t avail = GetAvail();
671 
672     size_t limitPos;
673     // (avail > kBufSize) is possible, if (Buffer.Size() > kBufSize)
674     const bool isFinished = (avail < kBufSize);
675     if (isFinished)
676     {
677       const unsigned kMinAllowed = 4;
678       if (avail <= kMinAllowed)
679       {
680         if (   !IsMultiVol
681             || Vols.StreamIndex < 0
682             || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size())
683           break;
684 
685         SkipLookahed(avail);
686 
687         const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex + 1];
688         if (!s.Stream)
689           break;
690 
691         RINOK(s.SeekToStart())
692 
693         InitBuf();
694         Vols.StreamIndex++;
695         _streamPos = 0;
696         Stream = s.Stream;
697         continue;
698       }
699       limitPos = avail - kMinAllowed;
700     }
701     else
702       limitPos = (avail - kCheckSize);
703 
704     // we don't check at (limitPos) for good fast aligned operations
705 
706     if (searchLimit)
707     {
708       if (_cnt > *searchLimit)
709         break;
710       UInt64 rem = *searchLimit - _cnt;
711       if (limitPos > rem)
712         limitPos = (size_t)rem + 1;
713     }
714 
715     if (limitPos == 0)
716       break;
717 
718     const Byte * const pStart = Buffer + _bufPos;
719     const Byte * p = pStart;
720     const Byte * const limit = pStart + limitPos;
721 
722     for (;; p++)
723     {
724       p = FindPK_4(p, limit);
725       if (p >= limit)
726         break;
727       size_t rem = (size_t)(pStart + avail - p);
728       /* 22.02 : we limit check size with kCheckSize to be consistent for
729          any different combination of _bufPos in Buffer and size of Buffer. */
730       if (rem > kCheckSize)
731         rem = kCheckSize;
732       const UInt32 res = IsArc_Zip_2(p, rem, isFinished);
733       if (res != k_IsArc_Res_NO)
734       {
735         if (rem < kMarkerSize)
736           return S_FALSE;
737         _signature = Get32(p);
738         SkipLookahed((size_t)(p - pStart));
739         ArcInfo.MarkerVolIndex = Vols.StreamIndex;
740         ArcInfo.MarkerPos = GetVirtStreamPos();
741         ArcInfo.MarkerPos2 = ArcInfo.MarkerPos;
742         SkipLookahed(4);
743         if (   _signature == NSignature::kNoSpan
744             || _signature == NSignature::kSpan)
745         {
746           if (rem < kMarkerSize * 2)
747             return S_FALSE;
748           ArcInfo.IsSpanMode = (_signature == NSignature::kSpan);
749           _signature = Get32(p + 4);
750           ArcInfo.MarkerPos2 += 4;
751           SkipLookahed(4);
752         }
753         return S_OK;
754       }
755     }
756 
757     if (!IsMultiVol && isFinished)
758       break;
759 
760     SkipLookahed((size_t)(p - pStart));
761 
762     if (Callback && (_cnt - progressPrev) >= ((UInt32)1 << 23))
763     {
764       progressPrev = _cnt;
765       // const UInt64 numFiles64 = 0;
766       RINOK(Callback->SetCompleted(NULL, &_cnt))
767     }
768   }
769 
770   return S_FALSE;
771 }
772 
773 
774 /*
775 ---------- IncreaseRealPosition ----------
776 moves virtual offset in virtual stream.
777 changing to new volumes is allowed
778 */
779 
IncreaseRealPosition(UInt64 offset,bool & isFinished)780 HRESULT CInArchive::IncreaseRealPosition(UInt64 offset, bool &isFinished)
781 {
782   isFinished = false;
783 
784   for (;;)
785   {
786     const size_t avail = GetAvail();
787 
788     if (offset <= avail)
789     {
790       _bufPos += (size_t)offset;
791       _cnt += offset;
792       return S_OK;
793     }
794 
795     _cnt += avail;
796     offset -= avail;
797 
798     _bufCached = 0;
799     _bufPos = 0;
800 
801     if (!_inBufMode)
802       break;
803 
804     CanStartNewVol = true;
805     LookAhead(1);
806 
807     if (GetAvail() == 0)
808       return S_OK;
809   }
810 
811   // cache is empty
812 
813   if (!IsMultiVol)
814   {
815     _cnt += offset;
816     return Stream->Seek((Int64)offset, STREAM_SEEK_CUR, &_streamPos);
817   }
818 
819   for (;;)
820   {
821     if (offset == 0)
822       return S_OK;
823 
824     if (Vols.StreamIndex < 0)
825       return S_FALSE;
826     if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size())
827     {
828       isFinished = true;
829       return S_OK;
830     }
831     {
832       const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex];
833       if (!s.Stream)
834       {
835         isFinished = true;
836         return S_OK;
837       }
838       if (_streamPos > s.Size)
839         return S_FALSE;
840       const UInt64 rem = s.Size - _streamPos;
841       if ((UInt64)offset <= rem)
842       {
843         _cnt += offset;
844         return Stream->Seek((Int64)offset, STREAM_SEEK_CUR, &_streamPos);
845       }
846       RINOK(Seek_SavePos(s.Size))
847       offset -= rem;
848       _cnt += rem;
849     }
850 
851     Stream = NULL;
852     _streamPos = 0;
853     Vols.StreamIndex++;
854     if ((unsigned)Vols.StreamIndex >= Vols.Streams.Size())
855     {
856       isFinished = true;
857       return S_OK;
858     }
859     const CVols::CSubStreamInfo &s2 = Vols.Streams[(unsigned)Vols.StreamIndex];
860     if (!s2.Stream)
861     {
862       isFinished = true;
863       return S_OK;
864     }
865     Stream = s2.Stream;
866     RINOK(Seek_SavePos(0))
867   }
868 }
869 
870 
871 
872 /*
873 ---------- LookAhead ----------
874 Reads data to buffer, if required.
875 
876 It can read from volumes as long as Buffer.Size().
877 But it moves to new volume, only if it's required to provide minRequired bytes in buffer.
878 
879 in:
880   (minRequired <= Buffer.Size())
881 
882 return:
883   S_OK : if (GetAvail() < minRequired) after function return, it's end of stream(s) data, or no new volume stream.
884   Error codes: IInStream::Read() error or IInStream::Seek() error for multivol
885 */
886 
LookAhead(size_t minRequired)887 HRESULT CInArchive::LookAhead(size_t minRequired)
888 {
889   for (;;)
890   {
891     const size_t avail = GetAvail();
892 
893     if (minRequired <= avail)
894       return S_OK;
895 
896     if (_bufPos != 0)
897     {
898       if (avail != 0)
899         memmove(Buffer, Buffer + _bufPos, avail);
900       _bufPos = 0;
901       _bufCached = avail;
902     }
903 
904     const size_t pos = _bufCached;
905     UInt32 processed = 0;
906     HRESULT res = Stream->Read(Buffer + pos, (UInt32)(Buffer.Size() - pos), &processed);
907     _streamPos += processed;
908     _bufCached += processed;
909 
910     if (res != S_OK)
911       return res;
912 
913     if (processed != 0)
914       continue;
915 
916     if (   !IsMultiVol
917         || !CanStartNewVol
918         || Vols.StreamIndex < 0
919         || (unsigned)Vols.StreamIndex + 1 >= Vols.Streams.Size())
920       return S_OK;
921 
922     const CVols::CSubStreamInfo &s = Vols.Streams[(unsigned)Vols.StreamIndex + 1];
923     if (!s.Stream)
924       return S_OK;
925 
926     RINOK(s.SeekToStart())
927 
928     Vols.StreamIndex++;
929     _streamPos = 0;
930     Stream = s.Stream;
931     // Vols.NeedSeek = false;
932   }
933 }
934 
935 
936 class CUnexpectEnd {};
937 
938 
939 /*
940 ---------- SafeRead ----------
941 
942 reads data of exact size from stream(s)
943 
944 in:
945   _inBufMode
946   if (CanStartNewVol) it can go to next volume before first byte reading, if there is end of volume data.
947 
948 in, out:
949   _streamPos  :  position in Stream
950   Stream
951   Vols  :  if (IsMultiVol)
952   _cnt
953 
954 out:
955   (CanStartNewVol == false), if some data was read
956 
957 return:
958   S_OK : success reading of requested data
959 
960 exceptions:
961   CSystemException() - stream reading error
962   CUnexpectEnd()  :  could not read data of requested size
963 */
964 
SafeRead(Byte * data,unsigned size)965 void CInArchive::SafeRead(Byte *data, unsigned size)
966 {
967   unsigned processed;
968   HRESULT result = ReadFromCache(data, size, processed);
969   if (result != S_OK)
970     throw CSystemException(result);
971   if (size != processed)
972     throw CUnexpectEnd();
973 }
974 
ReadBuffer(CByteBuffer & buffer,unsigned size)975 void CInArchive::ReadBuffer(CByteBuffer &buffer, unsigned size)
976 {
977   buffer.Alloc(size);
978   if (size != 0)
979     SafeRead(buffer, size);
980 }
981 
982 // Byte CInArchive::ReadByte  () { Byte b;      SafeRead(&b, 1); return b; }
983 // UInt16 CInArchive::ReadUInt16() { Byte buf[2]; SafeRead(buf, 2); return Get16(buf); }
ReadUInt32()984 UInt32 CInArchive::ReadUInt32() { Byte buf[4]; SafeRead(buf, 4); return Get32(buf); }
ReadUInt64()985 UInt64 CInArchive::ReadUInt64() { Byte buf[8]; SafeRead(buf, 8); return Get64(buf); }
986 
ReadSignature()987 void CInArchive::ReadSignature()
988 {
989   CanStartNewVol = true;
990   _signature = ReadUInt32();
991   // CanStartNewVol = false; // it's already changed in SafeRead
992 }
993 
994 
995 // we Skip() inside headers only, so no need for stream change in multivol.
996 
Skip(size_t num)997 void CInArchive::Skip(size_t num)
998 {
999   while (num != 0)
1000   {
1001     const unsigned kBufSize = (size_t)1 << 10;
1002     Byte buf[kBufSize];
1003     unsigned step = kBufSize;
1004     if (step > num)
1005       step = (unsigned)num;
1006     SafeRead(buf, step);
1007     num -= step;
1008   }
1009 }
1010 
1011 /*
1012 HRESULT CInArchive::Callback_Completed(unsigned numFiles)
1013 {
1014   const UInt64 numFiles64 = numFiles;
1015   return Callback->SetCompleted(&numFiles64, &_cnt);
1016 }
1017 */
1018 
Skip64(UInt64 num,unsigned numFiles)1019 HRESULT CInArchive::Skip64(UInt64 num, unsigned numFiles)
1020 {
1021   if (num == 0)
1022     return S_OK;
1023 
1024   for (;;)
1025   {
1026     size_t step = (size_t)1 << 24;
1027     if (step > num)
1028       step = (size_t)num;
1029     Skip(step);
1030     num -= step;
1031     if (num == 0)
1032       return S_OK;
1033     if (Callback)
1034     {
1035       const UInt64 numFiles64 = numFiles;
1036       RINOK(Callback->SetCompleted(&numFiles64, &_cnt))
1037     }
1038   }
1039 }
1040 
1041 
ReadFileName(unsigned size,AString & s)1042 bool CInArchive::ReadFileName(unsigned size, AString &s)
1043 {
1044   if (size == 0)
1045   {
1046     s.Empty();
1047     return true;
1048   }
1049   char *p = s.GetBuf(size);
1050   SafeRead((Byte *)p, size);
1051   unsigned i = size;
1052   do
1053   {
1054     if (p[i - 1] != 0)
1055       break;
1056   }
1057   while (--i);
1058   s.ReleaseBuf_CalcLen(size);
1059   return s.Len() == i;
1060 }
1061 
1062 
1063 #define ZIP64_IS_32_MAX(n) ((n) == 0xFFFFFFFF)
1064 #define ZIP64_IS_16_MAX(n) ((n) == 0xFFFF)
1065 
1066 
ReadExtra(const CLocalItem & item,unsigned extraSize,CExtraBlock & extra,UInt64 & unpackSize,UInt64 & packSize,CItem * cdItem)1067 bool CInArchive::ReadExtra(const CLocalItem &item, unsigned extraSize, CExtraBlock &extra,
1068     UInt64 &unpackSize, UInt64 &packSize,
1069     CItem *cdItem)
1070 {
1071   extra.Clear();
1072 
1073   while (extraSize >= 4)
1074   {
1075     CExtraSubBlock subBlock;
1076     const UInt32 pair = ReadUInt32();
1077     subBlock.ID = (pair & 0xFFFF);
1078     unsigned size = (unsigned)(pair >> 16);
1079     // const unsigned origSize = size;
1080 
1081     extraSize -= 4;
1082 
1083     if (size > extraSize)
1084     {
1085       // it's error in extra
1086       HeadersWarning = true;
1087       extra.Error = true;
1088       Skip(extraSize);
1089       return false;
1090     }
1091 
1092     extraSize -= size;
1093 
1094     if (subBlock.ID == NFileHeader::NExtraID::kZip64)
1095     {
1096       extra.IsZip64 = true;
1097       bool isOK = true;
1098 
1099       if (!cdItem
1100           && size == 16
1101           && !ZIP64_IS_32_MAX(unpackSize)
1102           && !ZIP64_IS_32_MAX(packSize))
1103       {
1104         /* Win10 Explorer's "Send to Zip" for big (3500 MiB) files
1105            creates Zip64 Extra in local file header.
1106            But if both uncompressed and compressed sizes are smaller than 4 GiB,
1107            Win10 doesn't store 0xFFFFFFFF in 32-bit fields as expected by zip specification.
1108            21.04: we ignore these minor errors in Win10 zip archives. */
1109         if (ReadUInt64() != unpackSize)
1110           isOK = false;
1111         if (ReadUInt64() != packSize)
1112           isOK = false;
1113         size = 0;
1114       }
1115       else
1116       {
1117         if (ZIP64_IS_32_MAX(unpackSize))
1118           { if (size < 8) isOK = false; else { size -= 8; unpackSize = ReadUInt64(); }}
1119 
1120         if (isOK && ZIP64_IS_32_MAX(packSize))
1121           { if (size < 8) isOK = false; else { size -= 8; packSize = ReadUInt64(); }}
1122 
1123         if (cdItem)
1124         {
1125           if (isOK)
1126           {
1127             if (ZIP64_IS_32_MAX(cdItem->LocalHeaderPos))
1128               { if (size < 8) isOK = false; else { size -= 8; cdItem->LocalHeaderPos = ReadUInt64(); }}
1129             /*
1130             else if (size == 8)
1131             {
1132               size -= 8;
1133               const UInt64 v = ReadUInt64();
1134               // soong_zip, an AOSP tool (written in the Go) writes incorrect value.
1135               // we can ignore that minor error here
1136               if (v != cdItem->LocalHeaderPos)
1137                 isOK = false; // ignore error
1138               // isOK = false; // force error
1139             }
1140             */
1141           }
1142 
1143           if (isOK && ZIP64_IS_16_MAX(cdItem->Disk))
1144             { if (size < 4) isOK = false; else { size -= 4; cdItem->Disk = ReadUInt32(); }}
1145         }
1146       }
1147 
1148       // we can ignore errors, when some zip archiver still write all fields to zip64 extra in local header
1149       // if (&& (cdItem || !isOK || origSize != 8 * 3 + 4 || size != 8 * 1 + 4))
1150       if (!isOK || size != 0)
1151       {
1152         HeadersWarning = true;
1153         extra.Error = true;
1154         extra.IsZip64_Error = true;
1155       }
1156       Skip(size);
1157     }
1158     else
1159     {
1160       ReadBuffer(subBlock.Data, size);
1161       extra.SubBlocks.Add(subBlock);
1162       if (subBlock.ID == NFileHeader::NExtraID::kIzUnicodeName)
1163       {
1164         if (!subBlock.CheckIzUnicode(item.Name))
1165           extra.Error = true;
1166       }
1167     }
1168   }
1169 
1170   if (extraSize != 0)
1171   {
1172     ExtraMinorError = true;
1173     extra.MinorError = true;
1174     // 7-Zip before 9.31 created incorrect WzAES Extra in folder's local headers.
1175     // so we don't return false, but just set warning flag
1176     // return false;
1177     Skip(extraSize);
1178   }
1179 
1180   return true;
1181 }
1182 
1183 
ReadLocalItem(CItemEx & item)1184 bool CInArchive::ReadLocalItem(CItemEx &item)
1185 {
1186   item.Disk = 0;
1187   if (IsMultiVol && Vols.StreamIndex >= 0)
1188     item.Disk = (UInt32)Vols.StreamIndex;
1189   const unsigned kPureHeaderSize = kLocalHeaderSize - 4;
1190   Byte p[kPureHeaderSize];
1191   SafeRead(p, kPureHeaderSize);
1192   {
1193     unsigned i;
1194     for (i = 0; i < kPureHeaderSize && p[i] == 0; i++);
1195     if (i == kPureHeaderSize)
1196       return false;
1197   }
1198 
1199   item.ExtractVersion.Version = p[0];
1200   item.ExtractVersion.HostOS = p[1];
1201   G16(2, item.Flags);
1202   G16(4, item.Method);
1203   G32(6, item.Time);
1204   G32(10, item.Crc);
1205   G32(14, item.PackSize);
1206   G32(18, item.Size);
1207   const unsigned nameSize = Get16(p + 22);
1208   const unsigned extraSize = Get16(p + 24);
1209   bool isOkName = ReadFileName(nameSize, item.Name);
1210   item.LocalFullHeaderSize = kLocalHeaderSize + (UInt32)nameSize + extraSize;
1211   item.DescriptorWasRead = false;
1212 
1213   /*
1214   if (item.IsDir())
1215     item.Size = 0; // check It
1216   */
1217 
1218   if (extraSize > 0)
1219   {
1220     if (!ReadExtra(item, extraSize, item.LocalExtra, item.Size, item.PackSize, NULL))
1221     {
1222       /* Most of archives are OK for Extra. But there are some rare cases
1223          that have error. And if error in first item, it can't open archive.
1224          So we ignore that error */
1225       // return false;
1226     }
1227   }
1228 
1229   if (!CheckDosTime(item.Time))
1230   {
1231     HeadersWarning = true;
1232     // return false;
1233   }
1234 
1235   if (item.Name.Len() != nameSize)
1236   {
1237     // we support some "bad" zip archives that contain zeros after name
1238     if (!isOkName)
1239       return false;
1240     HeadersWarning = true;
1241   }
1242 
1243   // return item.LocalFullHeaderSize <= ((UInt32)1 << 16);
1244   return true;
1245 }
1246 
1247 
FlagsAreSame(const CItem & i1,const CItem & i2_cd)1248 static bool FlagsAreSame(const CItem &i1, const CItem &i2_cd)
1249 {
1250   if (i1.Method != i2_cd.Method)
1251     return false;
1252 
1253   UInt32 mask = i1.Flags ^ i2_cd.Flags;
1254   if (mask == 0)
1255     return true;
1256   switch (i1.Method)
1257   {
1258     case NFileHeader::NCompressionMethod::kDeflate:
1259       mask &= 0x7FF9;
1260       break;
1261     default:
1262       if (i1.Method <= NFileHeader::NCompressionMethod::kImplode)
1263         mask &= 0x7FFF;
1264   }
1265 
1266   // we can ignore utf8 flag, if name is ascii, or if only cdItem has utf8 flag
1267   if (mask & NFileHeader::NFlags::kUtf8)
1268     if ((i1.Name.IsAscii() && i2_cd.Name.IsAscii())
1269         || (i2_cd.Flags & NFileHeader::NFlags::kUtf8))
1270       mask &= ~NFileHeader::NFlags::kUtf8;
1271 
1272   // some bad archive in rare case can use descriptor without descriptor flag in Central Dir
1273   // if (i1.HasDescriptor())
1274   mask &= ~NFileHeader::NFlags::kDescriptorUsedMask;
1275 
1276   return (mask == 0);
1277 }
1278 
1279 
1280 // #ifdef _WIN32
AreEqualPaths_IgnoreSlashes(const char * s1,const char * s2)1281 static bool AreEqualPaths_IgnoreSlashes(const char *s1, const char *s2)
1282 {
1283   for (;;)
1284   {
1285     char c1 = *s1++;
1286     char c2 = *s2++;
1287     if (c1 == c2)
1288     {
1289       if (c1 == 0)
1290         return true;
1291     }
1292     else
1293     {
1294       if (c1 == '\\') c1 = '/';
1295       if (c2 == '\\') c2 = '/';
1296       if (c1 != c2)
1297         return false;
1298     }
1299   }
1300 }
1301 // #endif
1302 
1303 
AreItemsEqual(const CItemEx & localItem,const CItemEx & cdItem)1304 static bool AreItemsEqual(const CItemEx &localItem, const CItemEx &cdItem)
1305 {
1306   if (!FlagsAreSame(localItem, cdItem))
1307     return false;
1308   if (!localItem.HasDescriptor())
1309   {
1310     if (cdItem.PackSize != localItem.PackSize
1311         || cdItem.Size != localItem.Size
1312         || (cdItem.Crc != localItem.Crc && cdItem.Crc != 0)) // some program writes 0 to crc field in central directory
1313       return false;
1314   }
1315   /* pkzip 2.50 creates incorrect archives. It uses
1316        - WIN encoding for name in local header
1317        - OEM encoding for name in central header
1318      We don't support these strange items. */
1319 
1320   /* if (cdItem.Name.Len() != localItem.Name.Len())
1321     return false;
1322   */
1323   if (cdItem.Name != localItem.Name)
1324   {
1325     // #ifdef _WIN32
1326     // some xap files use backslash in central dir items.
1327     // we can ignore such errors in windows, where all slashes are converted to backslashes
1328     unsigned hostOs = cdItem.GetHostOS();
1329 
1330     if (hostOs == NFileHeader::NHostOS::kFAT ||
1331         hostOs == NFileHeader::NHostOS::kNTFS)
1332     {
1333       if (!AreEqualPaths_IgnoreSlashes(cdItem.Name, localItem.Name))
1334       {
1335         // pkzip 2.50 uses DOS encoding in central dir and WIN encoding in local header.
1336         // so we ignore that error
1337         if (hostOs != NFileHeader::NHostOS::kFAT
1338             || cdItem.MadeByVersion.Version < 25
1339             || cdItem.MadeByVersion.Version > 40)
1340           return false;
1341       }
1342     }
1343     /*
1344     else
1345     #endif
1346       return false;
1347     */
1348   }
1349   return true;
1350 }
1351 
1352 
Read_LocalItem_After_CdItem(CItemEx & item,bool & isAvail,bool & headersError)1353 HRESULT CInArchive::Read_LocalItem_After_CdItem(CItemEx &item, bool &isAvail, bool &headersError)
1354 {
1355   isAvail = true;
1356   headersError = false;
1357   if (item.FromLocal)
1358     return S_OK;
1359   try
1360   {
1361     UInt64 offset = item.LocalHeaderPos;
1362 
1363     if (IsMultiVol)
1364     {
1365       if (item.Disk >= Vols.Streams.Size())
1366       {
1367         isAvail = false;
1368         return S_FALSE;
1369       }
1370       Stream = Vols.Streams[item.Disk].Stream;
1371       Vols.StreamIndex = (int)item.Disk;
1372       if (!Stream)
1373       {
1374         isAvail = false;
1375         return S_FALSE;
1376       }
1377     }
1378     else
1379     {
1380       if (UseDisk_in_SingleVol && item.Disk != EcdVolIndex)
1381       {
1382         isAvail = false;
1383         return S_FALSE;
1384       }
1385       Stream = StreamRef;
1386 
1387       offset = (UInt64)((Int64)offset + ArcInfo.Base);
1388       if (ArcInfo.Base < 0 && (Int64)offset < 0)
1389       {
1390         isAvail = false;
1391         return S_FALSE;
1392       }
1393     }
1394 
1395     _inBufMode = false;
1396     RINOK(Seek_SavePos(offset))
1397     InitBuf();
1398     /*
1399     // we can use buf mode with small buffer to reduce
1400     // the number of Read() calls in ReadLocalItem()
1401     _inBufMode = true;
1402     Buffer.Alloc(1 << 10);
1403     if (!Buffer.IsAllocated())
1404       return E_OUTOFMEMORY;
1405     */
1406 
1407     CItemEx localItem;
1408     if (ReadUInt32() != NSignature::kLocalFileHeader)
1409       return S_FALSE;
1410     ReadLocalItem(localItem);
1411     if (!AreItemsEqual(localItem, item))
1412       return S_FALSE;
1413     item.LocalFullHeaderSize = localItem.LocalFullHeaderSize;
1414     item.LocalExtra = localItem.LocalExtra;
1415     if (item.Crc != localItem.Crc && !localItem.HasDescriptor())
1416     {
1417       item.Crc = localItem.Crc;
1418       headersError = true;
1419     }
1420     if ((item.Flags ^ localItem.Flags) & NFileHeader::NFlags::kDescriptorUsedMask)
1421     {
1422       item.Flags = (UInt16)(item.Flags ^ NFileHeader::NFlags::kDescriptorUsedMask);
1423       headersError = true;
1424     }
1425     item.FromLocal = true;
1426   }
1427   catch(...) { return S_FALSE; }
1428   return S_OK;
1429 }
1430 
1431 
1432 /*
1433 ---------- FindDescriptor ----------
1434 
1435 in:
1436   _streamPos : position in Stream
1437   Stream :
1438   Vols : if (IsMultiVol)
1439 
1440 action:
1441   searches descriptor in input stream(s).
1442   sets
1443     item.DescriptorWasRead = true;
1444     item.Size
1445     item.PackSize
1446     item.Crc
1447   if descriptor was found
1448 
1449 out:
1450   S_OK:
1451       if ( item.DescriptorWasRead) : if descriptor was found
1452       if (!item.DescriptorWasRead) : if descriptor was not found : unexpected end of stream(s)
1453 
1454   S_FALSE: if no items or there is just one item with strange properies that doesn't look like real archive.
1455 
1456   another error code: Callback error.
1457 
1458 exceptions :
1459   CSystemException() : stream reading error
1460 */
1461 
FindDescriptor(CItemEx & item,unsigned numFiles)1462 HRESULT CInArchive::FindDescriptor(CItemEx &item, unsigned numFiles)
1463 {
1464   // const size_t kBufSize = (size_t)1 << 5; // don't increase it too much. It reads data look ahead.
1465 
1466   // Buffer.Alloc(kBufSize);
1467   // Byte *buf = Buffer;
1468 
1469   UInt64 packedSize = 0;
1470 
1471   UInt64 progressPrev = _cnt;
1472 
1473   for (;;)
1474   {
1475     /* appnote specification claims that we must use 64-bit descriptor, if there is zip64 extra.
1476        But some old third-party xps archives used 64-bit descriptor without zip64 extra. */
1477     // unsigned descriptorSize = kDataDescriptorSize64 + kNextSignatureSize;
1478 
1479     // const unsigned kNextSignatureSize = 0;  // we can disable check for next signatuire
1480     const unsigned kNextSignatureSize = 4;  // we check also for signature for next File headear
1481 
1482     const unsigned descriptorSize4 = item.GetDescriptorSize() + kNextSignatureSize;
1483 
1484     if (descriptorSize4 > Buffer.Size()) return E_FAIL;
1485 
1486     // size_t processedSize;
1487     CanStartNewVol = true;
1488     RINOK(LookAhead(descriptorSize4))
1489     const size_t avail = GetAvail();
1490 
1491     if (avail < descriptorSize4)
1492     {
1493       // we write to packSize all these available bytes.
1494       // later it's simpler to work with such value than with 0
1495       // if (item.PackSize == 0)
1496         item.PackSize = packedSize + avail;
1497       if (item.Method == 0)
1498         item.Size = item.PackSize;
1499       SkipLookahed(avail);
1500       return S_OK;
1501     }
1502 
1503     const Byte * const pStart = Buffer + _bufPos;
1504     const Byte * p = pStart;
1505     const Byte * const limit = pStart + (avail - descriptorSize4);
1506 
1507     for (; p <= limit; p++)
1508     {
1509       // descriptor signature field is Info-ZIP's extension to pkware Zip specification.
1510       // New ZIP specification also allows descriptorSignature.
1511 
1512       p = FindPK_4(p, limit + 1);
1513       if (p > limit)
1514         break;
1515 
1516       /*
1517       if (*p != 0x50)
1518         continue;
1519       */
1520 
1521       if (Get32(p) != NSignature::kDataDescriptor)
1522         continue;
1523 
1524       // we check next signatuire after descriptor
1525       // maybe we need check only 2 bytes "PK" instead of 4 bytes, if some another type of header is possible after descriptor
1526       const UInt32 sig = Get32(p + descriptorSize4 - kNextSignatureSize);
1527       if (   sig != NSignature::kLocalFileHeader
1528           && sig != NSignature::kCentralFileHeader)
1529         continue;
1530 
1531       const UInt64 packSizeCur = packedSize + (size_t)(p - pStart);
1532       if (descriptorSize4 == kDataDescriptorSize64 + kNextSignatureSize) // if (item.LocalExtra.IsZip64)
1533       {
1534         const UInt64 descriptorPackSize = Get64(p + 8);
1535         if (descriptorPackSize != packSizeCur)
1536           continue;
1537         item.Size = Get64(p + 16);
1538       }
1539       else
1540       {
1541         const UInt32 descriptorPackSize = Get32(p + 8);
1542         if (descriptorPackSize != (UInt32)packSizeCur)
1543           continue;
1544         item.Size = Get32(p + 12);
1545         // that item.Size can be truncated to 32-bit value here
1546       }
1547       // We write calculated 64-bit packSize, even if descriptor64 was not used
1548       item.PackSize = packSizeCur;
1549 
1550       item.DescriptorWasRead = true;
1551       item.Crc = Get32(p + 4);
1552 
1553       const size_t skip = (size_t)(p - pStart) + descriptorSize4 - kNextSignatureSize;
1554 
1555       SkipLookahed(skip);
1556 
1557       return S_OK;
1558     }
1559 
1560     const size_t skip = (size_t)(p - pStart);
1561     SkipLookahed(skip);
1562 
1563     packedSize += skip;
1564 
1565     if (Callback)
1566     if (_cnt - progressPrev >= ((UInt32)1 << 22))
1567     {
1568       progressPrev = _cnt;
1569       const UInt64 numFiles64 = numFiles;
1570       RINOK(Callback->SetCompleted(&numFiles64, &_cnt))
1571     }
1572   }
1573 }
1574 
1575 
CheckDescriptor(const CItemEx & item)1576 HRESULT CInArchive::CheckDescriptor(const CItemEx &item)
1577 {
1578   if (!item.HasDescriptor())
1579     return S_OK;
1580 
1581   // pkzip's version without descriptor signature is not supported
1582 
1583   bool isFinished = false;
1584   RINOK(IncreaseRealPosition(item.PackSize, isFinished))
1585   if (isFinished)
1586     return S_FALSE;
1587 
1588   /*
1589   if (!IsMultiVol)
1590   {
1591     RINOK(Seek_SavePos(ArcInfo.Base + item.GetDataPosition() + item.PackSize));
1592   }
1593   */
1594 
1595   Byte buf[kDataDescriptorSize64];
1596   try
1597   {
1598     CanStartNewVol = true;
1599     SafeRead(buf, item.GetDescriptorSize());
1600   }
1601   catch (const CSystemException &e) { return e.ErrorCode; }
1602   // catch (const CUnexpectEnd &)
1603   catch(...)
1604   {
1605     return S_FALSE;
1606   }
1607   // RINOK(ReadStream_FALSE(Stream, buf, item.GetDescriptorSize()));
1608 
1609   if (Get32(buf) != NSignature::kDataDescriptor)
1610     return S_FALSE;
1611   UInt32 crc = Get32(buf + 4);
1612   UInt64 packSize, unpackSize;
1613 
1614   if (item.LocalExtra.IsZip64)
1615   {
1616     packSize = Get64(buf + 8);
1617     unpackSize = Get64(buf + 16);
1618   }
1619   else
1620   {
1621     packSize = Get32(buf + 8);
1622     unpackSize = Get32(buf + 12);
1623   }
1624 
1625   if (crc != item.Crc || item.PackSize != packSize || item.Size != unpackSize)
1626     return S_FALSE;
1627   return S_OK;
1628 }
1629 
1630 
Read_LocalItem_After_CdItem_Full(CItemEx & item)1631 HRESULT CInArchive::Read_LocalItem_After_CdItem_Full(CItemEx &item)
1632 {
1633   if (item.FromLocal)
1634     return S_OK;
1635   try
1636   {
1637     bool isAvail = true;
1638     bool headersError = false;
1639     RINOK(Read_LocalItem_After_CdItem(item, isAvail, headersError))
1640     if (headersError)
1641       return S_FALSE;
1642     if (item.HasDescriptor())
1643       return CheckDescriptor(item);
1644   }
1645   catch(...) { return S_FALSE; }
1646   return S_OK;
1647 }
1648 
1649 
ReadCdItem(CItemEx & item)1650 HRESULT CInArchive::ReadCdItem(CItemEx &item)
1651 {
1652   item.FromCentral = true;
1653   Byte p[kCentralHeaderSize - 4];
1654   SafeRead(p, kCentralHeaderSize - 4);
1655 
1656   item.MadeByVersion.Version = p[0];
1657   item.MadeByVersion.HostOS = p[1];
1658   item.ExtractVersion.Version = p[2];
1659   item.ExtractVersion.HostOS = p[3];
1660   G16(4, item.Flags);
1661   G16(6, item.Method);
1662   G32(8, item.Time);
1663   G32(12, item.Crc);
1664   G32(16, item.PackSize);
1665   G32(20, item.Size);
1666   const unsigned nameSize = Get16(p + 24);
1667   const unsigned extraSize = Get16(p + 26);
1668   const unsigned commentSize = Get16(p + 28);
1669   G16(30, item.Disk);
1670   G16(32, item.InternalAttrib);
1671   G32(34, item.ExternalAttrib);
1672   G32(38, item.LocalHeaderPos);
1673   ReadFileName(nameSize, item.Name);
1674 
1675   if (extraSize > 0)
1676     ReadExtra(item, extraSize, item.CentralExtra, item.Size, item.PackSize, &item);
1677 
1678   // May be these strings must be deleted
1679   /*
1680   if (item.IsDir())
1681     item.Size = 0;
1682   */
1683 
1684   ReadBuffer(item.Comment, commentSize);
1685   return S_OK;
1686 }
1687 
1688 
1689 /*
1690 TryEcd64()
1691   (_inBufMode == false) is expected here
1692   so TryEcd64() can't change the Buffer.
1693   if (Ecd64 is not covered by cached region),
1694     TryEcd64() can change cached region ranges (_bufCached, _bufPos) and _streamPos.
1695 */
1696 
TryEcd64(UInt64 offset,CCdInfo & cdInfo)1697 HRESULT CInArchive::TryEcd64(UInt64 offset, CCdInfo &cdInfo)
1698 {
1699   if (offset >= ((UInt64)1 << 63))
1700     return S_FALSE;
1701   Byte buf[kEcd64_FullSize];
1702 
1703   RINOK(SeekToVol(Vols.StreamIndex, offset))
1704   RINOK(ReadFromCache_FALSE(buf, kEcd64_FullSize))
1705 
1706   if (Get32(buf) != NSignature::kEcd64)
1707     return S_FALSE;
1708   UInt64 mainSize = Get64(buf + 4);
1709   if (mainSize < kEcd64_MainSize || mainSize > ((UInt64)1 << 40))
1710     return S_FALSE;
1711   cdInfo.ParseEcd64e(buf + 12);
1712   return S_OK;
1713 }
1714 
1715 
1716 /* FindCd() doesn't use previous cached region,
1717    but it uses Buffer. So it sets new cached region */
1718 
FindCd(bool checkOffsetMode)1719 HRESULT CInArchive::FindCd(bool checkOffsetMode)
1720 {
1721   CCdInfo &cdInfo = Vols.ecd;
1722 
1723   UInt64 endPos;
1724 
1725   // There are no useful data in cache in most cases here.
1726   // So here we don't use cache data from previous operations .
1727 
1728   InitBuf();
1729   RINOK(InStream_GetSize_SeekToEnd(Stream, endPos))
1730   _streamPos = endPos;
1731 
1732   // const UInt32 kBufSizeMax2 = ((UInt32)1 << 16) + kEcdSize + kEcd64Locator_Size + kEcd64_FullSize;
1733   const size_t kBufSizeMax = ((size_t)1 << 17); // must be larger than kBufSizeMax2
1734 
1735   const size_t bufSize = (endPos < kBufSizeMax) ? (size_t)endPos : kBufSizeMax;
1736   if (bufSize < kEcdSize)
1737     return S_FALSE;
1738   // CByteArr byteBuffer(bufSize);
1739 
1740   RINOK(AllocateBuffer(kBufSizeMax))
1741 
1742   RINOK(Seek_SavePos(endPos - bufSize))
1743 
1744   size_t processed = bufSize;
1745   HRESULT res = ReadStream(Stream, Buffer, &processed);
1746   _streamPos += processed;
1747   _bufCached = processed;
1748   _bufPos = 0;
1749   _cnt += processed;
1750   if (res != S_OK)
1751     return res;
1752   if (processed != bufSize)
1753     return S_FALSE;
1754 
1755 
1756   for (size_t i = bufSize - kEcdSize + 1;;)
1757   {
1758     if (i == 0)
1759       return S_FALSE;
1760 
1761     const Byte *buf = Buffer;
1762 
1763     for (;;)
1764     {
1765       i--;
1766       if (buf[i] == 0x50)
1767         break;
1768       if (i == 0)
1769         return S_FALSE;
1770     }
1771 
1772     if (Get32(buf + i) != NSignature::kEcd)
1773       continue;
1774 
1775     cdInfo.ParseEcd32(buf + i);
1776 
1777     if (i >= kEcd64Locator_Size)
1778     {
1779       const size_t locatorIndex = i - kEcd64Locator_Size;
1780       if (Get32(buf + locatorIndex) == NSignature::kEcd64Locator)
1781       {
1782         CLocator locator;
1783         locator.Parse(buf + locatorIndex + 4);
1784         UInt32 numDisks = locator.NumDisks;
1785         // we ignore the error, where some zip creators use (NumDisks == 0)
1786         if (numDisks == 0)
1787           numDisks = 1;
1788         if ((cdInfo.ThisDisk == numDisks - 1 || ZIP64_IS_16_MAX(cdInfo.ThisDisk))
1789             && locator.Ecd64Disk < numDisks)
1790         {
1791           if (locator.Ecd64Disk != cdInfo.ThisDisk && !ZIP64_IS_16_MAX(cdInfo.ThisDisk))
1792             return E_NOTIMPL;
1793 
1794           // Most of the zip64 use fixed size Zip64 ECD
1795           // we try relative backward reading.
1796 
1797           UInt64 absEcd64 = endPos - bufSize + i - (kEcd64Locator_Size + kEcd64_FullSize);
1798 
1799           if (locatorIndex >= kEcd64_FullSize)
1800           if (checkOffsetMode || absEcd64 == locator.Ecd64Offset)
1801           {
1802             const Byte *ecd64 = buf + locatorIndex - kEcd64_FullSize;
1803             if (Get32(ecd64) == NSignature::kEcd64)
1804             {
1805               UInt64 mainEcd64Size = Get64(ecd64 + 4);
1806               if (mainEcd64Size == kEcd64_MainSize)
1807               {
1808                 cdInfo.ParseEcd64e(ecd64 + 12);
1809                 ArcInfo.Base = (Int64)(absEcd64 - locator.Ecd64Offset);
1810                 // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1811                 return S_OK;
1812               }
1813             }
1814           }
1815 
1816           // some zip64 use variable size Zip64 ECD.
1817           // we try to use absolute offset from locator.
1818 
1819           if (absEcd64 != locator.Ecd64Offset)
1820           {
1821             if (TryEcd64(locator.Ecd64Offset, cdInfo) == S_OK)
1822             {
1823               ArcInfo.Base = 0;
1824               // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1825               return S_OK;
1826             }
1827           }
1828 
1829           // for variable Zip64 ECD with for archives with offset != 0.
1830 
1831           if (checkOffsetMode
1832               && ArcInfo.MarkerPos != 0
1833               && ArcInfo.MarkerPos + locator.Ecd64Offset != absEcd64)
1834           {
1835             if (TryEcd64(ArcInfo.MarkerPos + locator.Ecd64Offset, cdInfo) == S_OK)
1836             {
1837               ArcInfo.Base = (Int64)ArcInfo.MarkerPos;
1838               // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1839               return S_OK;
1840             }
1841           }
1842         }
1843       }
1844     }
1845 
1846     // bool isVolMode = (Vols.EndVolIndex != -1);
1847     // UInt32 searchDisk = (isVolMode ? Vols.EndVolIndex : 0);
1848 
1849     if (/* searchDisk == thisDisk && */ cdInfo.CdDisk <= cdInfo.ThisDisk)
1850     {
1851       // if (isVolMode)
1852       {
1853         if (cdInfo.CdDisk != cdInfo.ThisDisk)
1854           return S_OK;
1855       }
1856 
1857       UInt64 absEcdPos = endPos - bufSize + i;
1858       UInt64 cdEnd = cdInfo.Size + cdInfo.Offset;
1859       ArcInfo.Base = 0;
1860       // ArcInfo.BaseVolIndex = cdInfo.ThisDisk;
1861       if (absEcdPos != cdEnd)
1862       {
1863         /*
1864         if (cdInfo.Offset <= 16 && cdInfo.Size != 0)
1865         {
1866           // here we support some rare ZIP files with Central directory at the start
1867           ArcInfo.Base = 0;
1868         }
1869         else
1870         */
1871         ArcInfo.Base = (Int64)(absEcdPos - cdEnd);
1872       }
1873       return S_OK;
1874     }
1875   }
1876 }
1877 
1878 
TryReadCd(CObjectVector<CItemEx> & items,const CCdInfo & cdInfo,UInt64 cdOffset,UInt64 cdSize)1879 HRESULT CInArchive::TryReadCd(CObjectVector<CItemEx> &items, const CCdInfo &cdInfo, UInt64 cdOffset, UInt64 cdSize)
1880 {
1881   items.Clear();
1882   IsCdUnsorted = false;
1883 
1884   // _startLocalFromCd_Disk = (UInt32)(Int32)-1;
1885   // _startLocalFromCd_Offset = (UInt64)(Int64)-1;
1886 
1887   RINOK(SeekToVol(IsMultiVol ? (int)cdInfo.CdDisk : -1, cdOffset))
1888 
1889   _inBufMode = true;
1890   _cnt = 0;
1891 
1892   if (Callback)
1893   {
1894     RINOK(Callback->SetTotal(&cdInfo.NumEntries, IsMultiVol ? &Vols.TotalBytesSize : NULL))
1895   }
1896   UInt64 numFileExpected = cdInfo.NumEntries;
1897   const UInt64 *totalFilesPtr = &numFileExpected;
1898   bool isCorrect_NumEntries = (cdInfo.IsFromEcd64 || numFileExpected >= ((UInt32)1 << 16));
1899 
1900   while (_cnt < cdSize)
1901   {
1902     CanStartNewVol = true;
1903     if (ReadUInt32() != NSignature::kCentralFileHeader)
1904       return S_FALSE;
1905     CanStartNewVol = false;
1906     {
1907       CItemEx cdItem;
1908       RINOK(ReadCdItem(cdItem))
1909 
1910       /*
1911       if (cdItem.Disk < _startLocalFromCd_Disk ||
1912           cdItem.Disk == _startLocalFromCd_Disk &&
1913           cdItem.LocalHeaderPos < _startLocalFromCd_Offset)
1914       {
1915         _startLocalFromCd_Disk = cdItem.Disk;
1916         _startLocalFromCd_Offset = cdItem.LocalHeaderPos;
1917       }
1918       */
1919 
1920       if (items.Size() > 0 && !IsCdUnsorted)
1921       {
1922         const CItemEx &prev = items.Back();
1923         if (cdItem.Disk < prev.Disk
1924             || (cdItem.Disk == prev.Disk &&
1925             cdItem.LocalHeaderPos < prev.LocalHeaderPos))
1926           IsCdUnsorted = true;
1927       }
1928 
1929       items.Add(cdItem);
1930     }
1931     if (Callback && (items.Size() & 0xFFF) == 0)
1932     {
1933       const UInt64 numFiles = items.Size();
1934 
1935       if (numFiles > numFileExpected && totalFilesPtr)
1936       {
1937         if (isCorrect_NumEntries)
1938           totalFilesPtr = NULL;
1939         else
1940           while (numFiles > numFileExpected)
1941             numFileExpected += (UInt32)1 << 16;
1942         RINOK(Callback->SetTotal(totalFilesPtr, NULL))
1943       }
1944 
1945       RINOK(Callback->SetCompleted(&numFiles, &_cnt))
1946     }
1947   }
1948 
1949   CanStartNewVol = true;
1950 
1951   return (_cnt == cdSize) ? S_OK : S_FALSE;
1952 }
1953 
1954 
1955 /*
1956 static int CompareCdItems(void *const *elem1, void *const *elem2, void *)
1957 {
1958   const CItemEx *i1 = *(const CItemEx **)elem1;
1959   const CItemEx *i2 = *(const CItemEx **)elem2;
1960 
1961   if (i1->Disk < i2->Disk) return -1;
1962   if (i1->Disk > i2->Disk) return 1;
1963   if (i1->LocalHeaderPos < i2->LocalHeaderPos) return -1;
1964   if (i1->LocalHeaderPos > i2->LocalHeaderPos) return 1;
1965   if (i1 < i2) return -1;
1966   if (i1 > i2) return 1;
1967   return 0;
1968 }
1969 */
1970 
ReadCd(CObjectVector<CItemEx> & items,UInt32 & cdDisk,UInt64 & cdOffset,UInt64 & cdSize)1971 HRESULT CInArchive::ReadCd(CObjectVector<CItemEx> &items, UInt32 &cdDisk, UInt64 &cdOffset, UInt64 &cdSize)
1972 {
1973   bool checkOffsetMode = true;
1974 
1975   if (IsMultiVol)
1976   {
1977     if (Vols.EndVolIndex == -1)
1978       return S_FALSE;
1979     Stream = Vols.Streams[(unsigned)Vols.EndVolIndex].Stream;
1980     if (!Vols.StartIsZip)
1981       checkOffsetMode = false;
1982   }
1983   else
1984     Stream = StartStream;
1985 
1986   if (!Vols.ecd_wasRead)
1987   {
1988     RINOK(FindCd(checkOffsetMode))
1989   }
1990 
1991   CCdInfo &cdInfo = Vols.ecd;
1992 
1993   HRESULT res = S_FALSE;
1994 
1995   cdSize = cdInfo.Size;
1996   cdOffset = cdInfo.Offset;
1997   cdDisk = cdInfo.CdDisk;
1998 
1999   if (!IsMultiVol)
2000   {
2001     if (cdInfo.ThisDisk != cdInfo.CdDisk)
2002       return S_FALSE;
2003   }
2004 
2005   const UInt64 base = (IsMultiVol ? 0 : (UInt64)ArcInfo.Base);
2006   res = TryReadCd(items, cdInfo, base + cdOffset, cdSize);
2007 
2008   if (res == S_FALSE && !IsMultiVol && base != ArcInfo.MarkerPos)
2009   {
2010     // do we need that additional attempt to read cd?
2011     res = TryReadCd(items, cdInfo, ArcInfo.MarkerPos + cdOffset, cdSize);
2012     if (res == S_OK)
2013       ArcInfo.Base = (Int64)ArcInfo.MarkerPos;
2014   }
2015 
2016   // Some rare case files are unsorted
2017   // items.Sort(CompareCdItems, NULL);
2018   return res;
2019 }
2020 
2021 
FindItem(const CObjectVector<CItemEx> & items,const CItemEx & item)2022 static int FindItem(const CObjectVector<CItemEx> &items, const CItemEx &item)
2023 {
2024   unsigned left = 0, right = items.Size();
2025   for (;;)
2026   {
2027     if (left >= right)
2028       return -1;
2029     const unsigned index = (unsigned)(((size_t)left + (size_t)right) / 2);
2030     const CItemEx &item2 = items[index];
2031     if (item.Disk < item2.Disk)
2032       right = index;
2033     else if (item.Disk > item2.Disk)
2034       left = index + 1;
2035     else if (item.LocalHeaderPos == item2.LocalHeaderPos)
2036       return (int)index;
2037     else if (item.LocalHeaderPos < item2.LocalHeaderPos)
2038       right = index;
2039     else
2040       left = index + 1;
2041   }
2042 }
2043 
IsStrangeItem(const CItem & item)2044 static bool IsStrangeItem(const CItem &item)
2045 {
2046   return item.Name.Len() > (1 << 14) || item.Method > (1 << 8);
2047 }
2048 
2049 
2050 
2051 /*
2052   ---------- ReadLocals ----------
2053 
2054 in:
2055   (_signature == NSignature::kLocalFileHeader)
2056   VirtStreamPos : after _signature : position in Stream
2057   Stream :
2058   Vols : if (IsMultiVol)
2059   (_inBufMode == false)
2060 
2061 action:
2062   it parses local items.
2063 
2064   if ( IsMultiVol) it writes absolute offsets to CItemEx::LocalHeaderPos
2065   if (!IsMultiVol) it writes relative (from ArcInfo.Base) offsets to CItemEx::LocalHeaderPos
2066                later we can correct CItemEx::LocalHeaderPos values, if
2067                some new value for ArcInfo.Base will be detected
2068 out:
2069   S_OK:
2070     (_signature != NSignature::kLocalFileHeade)
2071     _streamPos : after _signature
2072 
2073   S_FALSE: if no items or there is just one item with strange properies that doesn't look like real archive.
2074 
2075   another error code: stream reading error or Callback error.
2076 
2077   CUnexpectEnd() exception : it's not fatal exception here.
2078       It means that reading was interrupted by unexpected end of input stream,
2079       but some CItemEx items were parsed OK.
2080       We can stop further archive parsing.
2081       But we can use all filled CItemEx items.
2082 */
2083 
ReadLocals(CObjectVector<CItemEx> & items)2084 HRESULT CInArchive::ReadLocals(CObjectVector<CItemEx> &items)
2085 {
2086   items.Clear();
2087 
2088   UInt64 progressPrev = _cnt;
2089 
2090   if (Callback)
2091   {
2092     RINOK(Callback->SetTotal(NULL, IsMultiVol ? &Vols.TotalBytesSize : NULL))
2093   }
2094 
2095   while (_signature == NSignature::kLocalFileHeader)
2096   {
2097     CItemEx item;
2098 
2099     item.LocalHeaderPos = GetVirtStreamPos() - 4;
2100     if (!IsMultiVol)
2101       item.LocalHeaderPos = (UInt64)((Int64)item.LocalHeaderPos - ArcInfo.Base);
2102 
2103     try
2104     {
2105       ReadLocalItem(item);
2106       item.FromLocal = true;
2107       bool isFinished = false;
2108 
2109       if (item.HasDescriptor())
2110       {
2111         RINOK(FindDescriptor(item, items.Size()))
2112         isFinished = !item.DescriptorWasRead;
2113       }
2114       else
2115       {
2116         if (item.PackSize >= ((UInt64)1 << 62))
2117           throw CUnexpectEnd();
2118         RINOK(IncreaseRealPosition(item.PackSize, isFinished))
2119       }
2120 
2121       items.Add(item);
2122 
2123       if (isFinished)
2124         throw CUnexpectEnd();
2125 
2126       ReadSignature();
2127     }
2128     catch (CUnexpectEnd &)
2129     {
2130       if (items.IsEmpty() || (items.Size() == 1 && IsStrangeItem(items[0])))
2131         return S_FALSE;
2132       throw;
2133     }
2134 
2135 
2136     if (Callback)
2137     if ((items.Size() & 0xFF) == 0
2138         || _cnt - progressPrev >= ((UInt32)1 << 22))
2139     {
2140       progressPrev = _cnt;
2141       const UInt64 numFiles = items.Size();
2142       RINOK(Callback->SetCompleted(&numFiles, &_cnt))
2143     }
2144   }
2145 
2146   if (items.Size() == 1 && _signature != NSignature::kCentralFileHeader)
2147     if (IsStrangeItem(items[0]))
2148       return S_FALSE;
2149 
2150   return S_OK;
2151 }
2152 
2153 
2154 
ParseArcName(IArchiveOpenVolumeCallback * volCallback)2155 HRESULT CVols::ParseArcName(IArchiveOpenVolumeCallback *volCallback)
2156 {
2157   UString name;
2158   {
2159     NWindows::NCOM::CPropVariant prop;
2160     RINOK(volCallback->GetProperty(kpidName, &prop))
2161     if (prop.vt != VT_BSTR)
2162       return S_OK;
2163     name = prop.bstrVal;
2164   }
2165 
2166   const int dotPos = name.ReverseFind_Dot();
2167   if (dotPos < 0)
2168     return S_OK;
2169   const UString ext = name.Ptr((unsigned)(dotPos + 1));
2170   name.DeleteFrom((unsigned)(dotPos + 1));
2171 
2172   StartVolIndex = (Int32)(-1);
2173 
2174   if (ext.IsEmpty())
2175     return S_OK;
2176   {
2177     wchar_t c = ext[0];
2178     IsUpperCase = (c >= 'A' && c <= 'Z');
2179     if (ext.IsEqualTo_Ascii_NoCase("zip"))
2180     {
2181       BaseName = name;
2182       StartIsZ = true;
2183       StartIsZip = true;
2184       return S_OK;
2185     }
2186     else if (ext.IsEqualTo_Ascii_NoCase("exe"))
2187     {
2188       /* possible cases:
2189          - exe with zip inside
2190          - sfx: a.exe, a.z02, a.z03,... , a.zip
2191                 a.exe is start volume.
2192          - zip renamed to exe
2193       */
2194 
2195       StartIsExe = true;
2196       BaseName = name;
2197       StartVolIndex = 0;
2198       /* sfx-zip can use both arc.exe and arc.zip
2199          We can open arc.zip, if it was requesed to open arc.exe.
2200          But it's possible that arc.exe and arc.zip are not parts of same archive.
2201          So we can disable such operation */
2202 
2203       // 18.04: we still want to open zip renamed to exe.
2204       /*
2205       {
2206         UString volName = name;
2207         volName += IsUpperCase ? "Z01" : "z01";
2208         {
2209           CMyComPtr<IInStream> stream;
2210           HRESULT res2 = volCallback->GetStream(volName, &stream);
2211           if (res2 == S_OK)
2212             DisableVolsSearch = true;
2213         }
2214       }
2215       */
2216       DisableVolsSearch = true;
2217       return S_OK;
2218     }
2219     else if (ext[0] == 'z' || ext[0] == 'Z')
2220     {
2221       if (ext.Len() < 3)
2222         return S_OK;
2223       const wchar_t *end = NULL;
2224       UInt32 volNum = ConvertStringToUInt32(ext.Ptr(1), &end);
2225       if (*end != 0 || volNum < 1 || volNum > ((UInt32)1 << 30))
2226         return S_OK;
2227       StartVolIndex = (Int32)(volNum - 1);
2228       BaseName = name;
2229       StartIsZ = true;
2230     }
2231     else
2232       return S_OK;
2233   }
2234 
2235   UString volName = BaseName;
2236   volName += (IsUpperCase ? "ZIP" : "zip");
2237 
2238   HRESULT res = volCallback->GetStream(volName, &ZipStream);
2239 
2240   if (res == S_FALSE || !ZipStream)
2241   {
2242     if (MissingName.IsEmpty())
2243     {
2244       MissingZip = true;
2245       MissingName = volName;
2246     }
2247     return S_OK;
2248   }
2249 
2250   return res;
2251 }
2252 
2253 
ReadVols2(IArchiveOpenVolumeCallback * volCallback,unsigned start,int lastDisk,int zipDisk,unsigned numMissingVolsMax,unsigned & numMissingVols)2254 HRESULT CInArchive::ReadVols2(IArchiveOpenVolumeCallback *volCallback,
2255     unsigned start, int lastDisk, int zipDisk, unsigned numMissingVolsMax, unsigned &numMissingVols)
2256 {
2257   if (Vols.DisableVolsSearch)
2258     return S_OK;
2259 
2260   numMissingVols = 0;
2261 
2262   for (unsigned i = start;; i++)
2263   {
2264     if (lastDisk >= 0 && i >= (unsigned)lastDisk)
2265       break;
2266 
2267     if (i < Vols.Streams.Size())
2268       if (Vols.Streams[i].Stream)
2269         continue;
2270 
2271     CMyComPtr<IInStream> stream;
2272 
2273     if ((int)i == zipDisk)
2274     {
2275       stream = Vols.ZipStream;
2276     }
2277     else if ((int)i == Vols.StartVolIndex)
2278     {
2279       stream = StartStream;
2280     }
2281     else
2282     {
2283       UString volName = Vols.BaseName;
2284       {
2285         volName.Add_Char(Vols.IsUpperCase ? 'Z' : 'z');
2286         const unsigned v = i + 1;
2287         if (v < 10)
2288           volName.Add_Char('0');
2289         volName.Add_UInt32(v);
2290       }
2291 
2292       HRESULT res = volCallback->GetStream(volName, &stream);
2293       if (res != S_OK && res != S_FALSE)
2294         return res;
2295       if (res == S_FALSE || !stream)
2296       {
2297         if (i == 0)
2298         {
2299           UString volName_exe = Vols.BaseName;
2300           volName_exe += (Vols.IsUpperCase ? "EXE" : "exe");
2301 
2302           HRESULT res2 = volCallback->GetStream(volName_exe, &stream);
2303           if (res2 != S_OK && res2 != S_FALSE)
2304             return res2;
2305           res = res2;
2306         }
2307       }
2308       if (res == S_FALSE || !stream)
2309       {
2310         if (i == 1 && Vols.StartIsExe)
2311           return S_OK;
2312         if (Vols.MissingName.IsEmpty())
2313           Vols.MissingName = volName;
2314         numMissingVols++;
2315         if (numMissingVols > numMissingVolsMax)
2316           return S_OK;
2317         if (lastDisk == -1 && numMissingVols != 0)
2318           return S_OK;
2319         continue;
2320       }
2321     }
2322 
2323     UInt64 pos, size;
2324     RINOK(InStream_GetPos_GetSize(stream, pos, size))
2325 
2326     while (i >= Vols.Streams.Size())
2327       Vols.Streams.AddNew();
2328 
2329     CVols::CSubStreamInfo &ss = Vols.Streams[i];
2330     Vols.NumVols++;
2331     Vols.TotalBytesSize += size;
2332 
2333     ss.Stream = stream;
2334     ss.Size = size;
2335 
2336     if ((int)i == zipDisk)
2337     {
2338       Vols.EndVolIndex = (int)(Vols.Streams.Size() - 1);
2339       break;
2340     }
2341   }
2342 
2343   return S_OK;
2344 }
2345 
2346 
ReadVols()2347 HRESULT CInArchive::ReadVols()
2348 {
2349   CMyComPtr<IArchiveOpenVolumeCallback> volCallback;
2350 
2351   Callback->QueryInterface(IID_IArchiveOpenVolumeCallback, (void **)&volCallback);
2352   if (!volCallback)
2353     return S_OK;
2354 
2355   RINOK(Vols.ParseArcName(volCallback))
2356 
2357   // const int startZIndex = Vols.StartVolIndex;
2358 
2359   if (!Vols.StartIsZ)
2360   {
2361     if (!Vols.StartIsExe)
2362       return S_OK;
2363   }
2364 
2365   int zipDisk = -1;
2366   int cdDisk = -1;
2367 
2368   if (Vols.StartIsZip)
2369     Vols.ZipStream = StartStream;
2370 
2371   if (Vols.ZipStream)
2372   {
2373     Stream = Vols.ZipStream;
2374 
2375     if (Vols.StartIsZip)
2376       Vols.StreamIndex = -1;
2377     else
2378     {
2379       Vols.StreamIndex = -2;
2380       InitBuf();
2381     }
2382 
2383     HRESULT res = FindCd(true);
2384 
2385     CCdInfo &ecd = Vols.ecd;
2386     if (res == S_OK)
2387     {
2388       zipDisk = (int)ecd.ThisDisk;
2389       Vols.ecd_wasRead = true;
2390 
2391       // if is not multivol or bad multivol, we return to main single stream code
2392       if (ecd.ThisDisk == 0
2393           || ecd.ThisDisk >= ((UInt32)1 << 30)
2394           || ecd.ThisDisk < ecd.CdDisk)
2395         return S_OK;
2396 
2397       cdDisk = (int)ecd.CdDisk;
2398       if (Vols.StartVolIndex < 0)
2399         Vols.StartVolIndex = (Int32)ecd.ThisDisk;
2400       else if ((UInt32)Vols.StartVolIndex >= ecd.ThisDisk)
2401         return S_OK;
2402 
2403       // Vols.StartVolIndex = ecd.ThisDisk;
2404       // Vols.EndVolIndex = ecd.ThisDisk;
2405       unsigned numMissingVols;
2406       if (cdDisk != zipDisk)
2407       {
2408         // get volumes required for cd.
2409         RINOK(ReadVols2(volCallback, (unsigned)cdDisk, zipDisk, zipDisk, 0, numMissingVols))
2410         if (numMissingVols != 0)
2411         {
2412           // cdOK = false;
2413         }
2414       }
2415     }
2416     else if (res != S_FALSE)
2417       return res;
2418   }
2419 
2420   if (Vols.StartVolIndex < 0)
2421   {
2422     // is not mutivol;
2423     return S_OK;
2424   }
2425 
2426   /*
2427   if (!Vols.Streams.IsEmpty())
2428     IsMultiVol = true;
2429   */
2430 
2431   unsigned numMissingVols;
2432 
2433   if (cdDisk != 0)
2434   {
2435     // get volumes that were no requested still
2436     const unsigned kNumMissingVolsMax = 1 << 12;
2437     RINOK(ReadVols2(volCallback, 0, cdDisk < 0 ? -1 : cdDisk, zipDisk, kNumMissingVolsMax, numMissingVols))
2438   }
2439 
2440   // if (Vols.StartVolIndex >= 0)
2441   {
2442     if (Vols.Streams.IsEmpty())
2443       if (Vols.StartVolIndex > (1 << 20))
2444         return S_OK;
2445     if ((unsigned)Vols.StartVolIndex >= Vols.Streams.Size()
2446         || !Vols.Streams[(unsigned)Vols.StartVolIndex].Stream)
2447     {
2448       // we get volumes starting from StartVolIndex, if they we not requested before know the volume index (if FindCd() was ok)
2449       RINOK(ReadVols2(volCallback, (unsigned)Vols.StartVolIndex, zipDisk, zipDisk, 0, numMissingVols))
2450     }
2451   }
2452 
2453   if (Vols.ZipStream)
2454   {
2455     // if there is no another volumes and volumeIndex is too big, we don't use multivol mode
2456     if (Vols.Streams.IsEmpty())
2457       if (zipDisk > (1 << 10))
2458         return S_OK;
2459     if (zipDisk >= 0)
2460     {
2461       // we create item in Streams for ZipStream, if we know the volume index (if FindCd() was ok)
2462       RINOK(ReadVols2(volCallback, (unsigned)zipDisk, zipDisk + 1, zipDisk, 0, numMissingVols))
2463     }
2464   }
2465 
2466   if (!Vols.Streams.IsEmpty())
2467   {
2468     IsMultiVol = true;
2469     /*
2470     if (cdDisk)
2471       IsMultiVol = true;
2472     */
2473     const int startZIndex = Vols.StartVolIndex;
2474     if (startZIndex >= 0)
2475     {
2476       // if all volumes before start volume are OK, we can start parsing from 0
2477       // if there are missing volumes before startZIndex, we start parsing in current startZIndex
2478       if ((unsigned)startZIndex < Vols.Streams.Size())
2479       {
2480         for (unsigned i = 0; i <= (unsigned)startZIndex; i++)
2481           if (!Vols.Streams[i].Stream)
2482           {
2483             Vols.StartParsingVol = startZIndex;
2484             break;
2485           }
2486       }
2487     }
2488   }
2489 
2490   return S_OK;
2491 }
2492 
2493 
2494 
Read(void * data,UInt32 size,UInt32 * processedSize)2495 HRESULT CVols::Read(void *data, UInt32 size, UInt32 *processedSize)
2496 {
2497   if (processedSize)
2498     *processedSize = 0;
2499   if (size == 0)
2500     return S_OK;
2501 
2502   for (;;)
2503   {
2504     if (StreamIndex < 0)
2505       return S_OK;
2506     if ((unsigned)StreamIndex >= Streams.Size())
2507       return S_OK;
2508     const CVols::CSubStreamInfo &s = Streams[(unsigned)StreamIndex];
2509     if (!s.Stream)
2510       return S_FALSE;
2511     if (NeedSeek)
2512     {
2513       RINOK(s.SeekToStart())
2514       NeedSeek = false;
2515     }
2516     UInt32 realProcessedSize = 0;
2517     HRESULT res = s.Stream->Read(data, size, &realProcessedSize);
2518     if (processedSize)
2519       *processedSize = realProcessedSize;
2520     if (res != S_OK)
2521       return res;
2522     if (realProcessedSize != 0)
2523       return res;
2524     StreamIndex++;
2525     NeedSeek = true;
2526   }
2527 }
2528 
Z7_COM7F_IMF(CVolStream::Read (void * data,UInt32 size,UInt32 * processedSize))2529 Z7_COM7F_IMF(CVolStream::Read(void *data, UInt32 size, UInt32 *processedSize))
2530 {
2531   return Vols->Read(data, size, processedSize);
2532 }
2533 
2534 
2535 
2536 
2537 #define COPY_ECD_ITEM_16(n) if (!isZip64 || !ZIP64_IS_16_MAX(ecd. n))     cdInfo. n = ecd. n;
2538 #define COPY_ECD_ITEM_32(n) if (!isZip64 || !ZIP64_IS_32_MAX(ecd. n)) cdInfo. n = ecd. n;
2539 
2540 
ReadHeaders(CObjectVector<CItemEx> & items)2541 HRESULT CInArchive::ReadHeaders(CObjectVector<CItemEx> &items)
2542 {
2543   // buffer that can be used for cd reading
2544   RINOK(AllocateBuffer(kSeqBufferSize))
2545 
2546   // here we can read small records. So we switch off _inBufMode.
2547   _inBufMode = false;
2548 
2549   HRESULT res = S_OK;
2550 
2551   bool localsWereRead = false;
2552 
2553   /* we try to open archive with the following modes:
2554      1) CD-MODE        : fast mode : we read backward ECD and CD, compare CD items with first Local item.
2555      2) LOCALS-CD-MODE : slow mode, if CD-MODE fails : we sequentially read all Locals and then CD.
2556      Then we read sequentially ECD64, Locator, ECD again at the end.
2557 
2558      - in LOCALS-CD-MODE we use use the following
2559          variables (with real cd properties) to set Base archive offset
2560          and check real cd properties with values from ECD/ECD64.
2561   */
2562 
2563   UInt64 cdSize = 0;
2564   UInt64 cdRelatOffset = 0;
2565   UInt32 cdDisk = 0;
2566 
2567   UInt64 cdAbsOffset = 0;   // absolute cd offset, for LOCALS-CD-MODE only.
2568 
2569 if (Force_ReadLocals_Mode)
2570 {
2571   IsArc = true;
2572   res = S_FALSE; // we will use LOCALS-CD-MODE mode
2573 }
2574 else
2575 {
2576   if (!MarkerIsFound || !MarkerIsSafe)
2577   {
2578     IsArc = true;
2579     res = ReadCd(items, cdDisk, cdRelatOffset, cdSize);
2580     if (res == S_OK)
2581       ReadSignature();
2582     else if (res != S_FALSE)
2583       return res;
2584   }
2585   else  // (MarkerIsFound && MarkerIsSafe)
2586   {
2587 
2588   // _signature must be kLocalFileHeader or kEcd or kEcd64
2589 
2590   SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2 + 4);
2591 
2592   CanStartNewVol = false;
2593 
2594   if (_signature == NSignature::kEcd64)
2595   {
2596     // UInt64 ecd64Offset = GetVirtStreamPos() - 4;
2597     IsZip64 = true;
2598 
2599     {
2600       const UInt64 recordSize = ReadUInt64();
2601       if (recordSize < kEcd64_MainSize)
2602         return S_FALSE;
2603       if (recordSize >= ((UInt64)1 << 62))
2604         return S_FALSE;
2605 
2606       {
2607         const unsigned kBufSize = kEcd64_MainSize;
2608         Byte buf[kBufSize];
2609         SafeRead(buf, kBufSize);
2610         CCdInfo cdInfo;
2611         cdInfo.ParseEcd64e(buf);
2612         if (!cdInfo.IsEmptyArc())
2613           return S_FALSE;
2614       }
2615 
2616       RINOK(Skip64(recordSize - kEcd64_MainSize, 0))
2617     }
2618 
2619     ReadSignature();
2620     if (_signature != NSignature::kEcd64Locator)
2621       return S_FALSE;
2622 
2623     {
2624       const unsigned kBufSize = 16;
2625       Byte buf[kBufSize];
2626       SafeRead(buf, kBufSize);
2627       CLocator locator;
2628       locator.Parse(buf);
2629       if (!locator.IsEmptyArc())
2630         return S_FALSE;
2631     }
2632 
2633     ReadSignature();
2634     if (_signature != NSignature::kEcd)
2635       return S_FALSE;
2636   }
2637 
2638   if (_signature == NSignature::kEcd)
2639   {
2640     // It must be empty archive or backware archive
2641     // we don't support backware archive still
2642 
2643     const unsigned kBufSize = kEcdSize - 4;
2644     Byte buf[kBufSize];
2645     SafeRead(buf, kBufSize);
2646     CEcd ecd;
2647     ecd.Parse(buf);
2648     // if (ecd.cdSize != 0)
2649     // Do we need also to support the case where empty zip archive with PK00 uses cdOffset = 4 ??
2650     if (!ecd.IsEmptyArc())
2651       return S_FALSE;
2652 
2653     ArcInfo.Base = (Int64)ArcInfo.MarkerPos;
2654     IsArc = true; // check it: we need more tests?
2655 
2656     RINOK(SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2))
2657     ReadSignature();
2658   }
2659   else
2660   {
2661     CItemEx firstItem;
2662     try
2663     {
2664       try
2665       {
2666         if (!ReadLocalItem(firstItem))
2667           return S_FALSE;
2668       }
2669       catch(CUnexpectEnd &)
2670       {
2671         return S_FALSE;
2672       }
2673 
2674       IsArc = true;
2675       res = ReadCd(items, cdDisk, cdRelatOffset, cdSize);
2676       if (res == S_OK)
2677         ReadSignature();
2678     }
2679     catch(CUnexpectEnd &) { res = S_FALSE; }
2680 
2681     if (res != S_FALSE && res != S_OK)
2682       return res;
2683 
2684     if (res == S_OK && items.Size() == 0)
2685       res = S_FALSE;
2686 
2687     if (res == S_OK)
2688     {
2689       // we can't read local items here to keep _inBufMode state
2690       if ((Int64)ArcInfo.MarkerPos2 < ArcInfo.Base)
2691         res = S_FALSE;
2692       else
2693       {
2694         firstItem.LocalHeaderPos = (UInt64)((Int64)ArcInfo.MarkerPos2 - ArcInfo.Base);
2695         int index = -1;
2696 
2697         UInt32 min_Disk = (UInt32)(Int32)-1;
2698         UInt64 min_LocalHeaderPos = (UInt64)(Int64)-1;
2699 
2700         if (!IsCdUnsorted)
2701           index = FindItem(items, firstItem);
2702         else
2703         {
2704           FOR_VECTOR (i, items)
2705           {
2706             const CItemEx &cdItem = items[i];
2707             if (cdItem.Disk == firstItem.Disk
2708                 && (cdItem.LocalHeaderPos == firstItem.LocalHeaderPos))
2709               index = (int)i;
2710 
2711             if (i == 0
2712                 || cdItem.Disk < min_Disk
2713                 || (cdItem.Disk == min_Disk && cdItem.LocalHeaderPos < min_LocalHeaderPos))
2714             {
2715               min_Disk = cdItem.Disk;
2716               min_LocalHeaderPos = cdItem.LocalHeaderPos;
2717             }
2718           }
2719         }
2720 
2721         if (index == -1)
2722           res = S_FALSE;
2723         else if (!AreItemsEqual(firstItem, items[(unsigned)index]))
2724           res = S_FALSE;
2725         else
2726         {
2727           ArcInfo.CdWasRead = true;
2728           if (IsCdUnsorted)
2729             ArcInfo.FirstItemRelatOffset = min_LocalHeaderPos;
2730           else
2731             ArcInfo.FirstItemRelatOffset = items[0].LocalHeaderPos;
2732 
2733           // ArcInfo.FirstItemRelatOffset = _startLocalFromCd_Offset;
2734         }
2735       }
2736     }
2737   }
2738   } // (MarkerIsFound && MarkerIsSafe)
2739 
2740 } // (!onlyLocalsMode)
2741 
2742 
2743   CObjectVector<CItemEx> cdItems;
2744 
2745   bool needSetBase = false; // we set needSetBase only for LOCALS_CD_MODE
2746   unsigned numCdItems = items.Size();
2747 
2748   #ifdef ZIP_SELF_CHECK
2749   res = S_FALSE; // if uncommented, it uses additional LOCALS-CD-MODE mode to check the code
2750   #endif
2751 
2752   if (res != S_OK)
2753   {
2754     // ---------- LOCALS-CD-MODE ----------
2755     // CD doesn't match firstItem,
2756     // so we clear items and read Locals and CD.
2757 
2758     items.Clear();
2759     localsWereRead = true;
2760 
2761     HeadersError = false;
2762     HeadersWarning = false;
2763     ExtraMinorError = false;
2764 
2765     /* we can use any mode: with buffer and without buffer
2766          without buffer : skips packed data : fast for big files : slow for small files
2767          with    buffer : reads packed data : slow for big files : fast for small files
2768        Buffer mode is more effective. */
2769     // _inBufMode = false;
2770     _inBufMode = true;
2771     // we could change the buffer size here, if we want smaller Buffer.
2772     // RINOK(ReAllocateBuffer(kSeqBufferSize));
2773     // InitBuf()
2774 
2775     ArcInfo.Base = 0;
2776 
2777    if (!Disable_FindMarker)
2778    {
2779     if (!MarkerIsFound)
2780     {
2781       if (!IsMultiVol)
2782         return S_FALSE;
2783       if (Vols.StartParsingVol != 0)
2784         return S_FALSE;
2785       // if (StartParsingVol == 0) and we didn't find marker, we use default zero marker.
2786       // so we suppose that there is no sfx stub
2787       RINOK(SeekToVol(0, ArcInfo.MarkerPos2))
2788     }
2789     else
2790     {
2791       if (ArcInfo.MarkerPos != 0)
2792       {
2793         /*
2794         If multi-vol or there is (No)Span-marker at start of stream, we set (Base) as 0.
2795         In another caes:
2796           (No)Span-marker is supposed as false positive. So we set (Base) as main marker (MarkerPos2).
2797           The (Base) can be corrected later after ECD reading.
2798           But sfx volume with stub and (No)Span-marker in (!IsMultiVol) mode will have incorrect (Base) here.
2799         */
2800         ArcInfo.Base = (Int64)ArcInfo.MarkerPos2;
2801       }
2802       RINOK(SeekToVol(ArcInfo.MarkerVolIndex, ArcInfo.MarkerPos2))
2803     }
2804    }
2805     _cnt = 0;
2806 
2807     ReadSignature();
2808 
2809     LocalsWereRead = true;
2810 
2811     RINOK(ReadLocals(items))
2812 
2813     if (_signature != NSignature::kCentralFileHeader)
2814     {
2815       // GetVirtStreamPos() - 4
2816       if (items.IsEmpty())
2817         return S_FALSE;
2818 
2819       bool isError = true;
2820 
2821       const UInt32 apkSize = _signature;
2822       const unsigned kApkFooterSize = 16 + 8;
2823       if (apkSize >= kApkFooterSize && apkSize <= (1 << 20))
2824       {
2825         if (ReadUInt32() == 0)
2826         {
2827           CByteBuffer apk;
2828           apk.Alloc(apkSize);
2829           SafeRead(apk, apkSize);
2830           ReadSignature();
2831           const Byte *footer = apk + apkSize - kApkFooterSize;
2832           if (_signature == NSignature::kCentralFileHeader)
2833           if (GetUi64(footer) == apkSize)
2834           if (memcmp(footer + 8, "APK Sig Block 42", 16) == 0)
2835           {
2836             isError = false;
2837             IsApk = true;
2838           }
2839         }
2840       }
2841 
2842       if (isError)
2843       {
2844         NoCentralDir = true;
2845         HeadersError = true;
2846         return S_OK;
2847       }
2848     }
2849 
2850     _inBufMode = true;
2851 
2852     cdAbsOffset = GetVirtStreamPos() - 4;
2853     cdDisk = (UInt32)Vols.StreamIndex;
2854 
2855     #ifdef ZIP_SELF_CHECK
2856     if (!IsMultiVol && _cnt != GetVirtStreamPos() - ArcInfo.MarkerPos2)
2857       return E_FAIL;
2858     #endif
2859 
2860     const UInt64 processedCnt_start = _cnt;
2861 
2862     for (;;)
2863     {
2864       CItemEx cdItem;
2865 
2866       RINOK(ReadCdItem(cdItem))
2867 
2868       cdItems.Add(cdItem);
2869       if (Callback && (cdItems.Size() & 0xFFF) == 0)
2870       {
2871         const UInt64 numFiles = items.Size();
2872         const UInt64 numBytes = _cnt;
2873         RINOK(Callback->SetCompleted(&numFiles, &numBytes))
2874       }
2875       ReadSignature();
2876       if (_signature != NSignature::kCentralFileHeader)
2877         break;
2878     }
2879 
2880     cdSize = _cnt - processedCnt_start;
2881 
2882     #ifdef ZIP_SELF_CHECK
2883     if (!IsMultiVol)
2884     {
2885       if (_cnt != GetVirtStreamPos() - ArcInfo.MarkerPos2)
2886         return E_FAIL;
2887       if (cdSize != (GetVirtStreamPos() - 4) - cdAbsOffset)
2888         return E_FAIL;
2889     }
2890     #endif
2891 
2892     needSetBase = true;
2893     numCdItems = cdItems.Size();
2894     cdRelatOffset = (UInt64)((Int64)cdAbsOffset - ArcInfo.Base);
2895 
2896     if (!cdItems.IsEmpty())
2897     {
2898       ArcInfo.CdWasRead = true;
2899       ArcInfo.FirstItemRelatOffset = cdItems[0].LocalHeaderPos;
2900     }
2901   }
2902 
2903 
2904 
2905   CCdInfo cdInfo;
2906   CLocator locator;
2907   bool isZip64 = false;
2908   const UInt64 ecd64AbsOffset = GetVirtStreamPos() - 4;
2909   int ecd64Disk = -1;
2910 
2911   if (_signature == NSignature::kEcd64)
2912   {
2913     ecd64Disk = Vols.StreamIndex;
2914 
2915     IsZip64 = isZip64 = true;
2916 
2917     {
2918       const UInt64 recordSize = ReadUInt64();
2919       if (recordSize < kEcd64_MainSize
2920           || recordSize >= ((UInt64)1 << 62))
2921       {
2922         HeadersError = true;
2923         return S_OK;
2924       }
2925 
2926       {
2927         const unsigned kBufSize = kEcd64_MainSize;
2928         Byte buf[kBufSize];
2929         SafeRead(buf, kBufSize);
2930         cdInfo.ParseEcd64e(buf);
2931       }
2932 
2933       RINOK(Skip64(recordSize - kEcd64_MainSize, items.Size()))
2934     }
2935 
2936 
2937     ReadSignature();
2938 
2939     if (_signature != NSignature::kEcd64Locator)
2940     {
2941       HeadersError = true;
2942       return S_OK;
2943     }
2944 
2945     {
2946       const unsigned kBufSize = 16;
2947       Byte buf[kBufSize];
2948       SafeRead(buf, kBufSize);
2949       locator.Parse(buf);
2950       // we ignore the error, where some zip creators use (NumDisks == 0)
2951       // if (locator.NumDisks == 0) HeadersWarning = true;
2952     }
2953 
2954     ReadSignature();
2955   }
2956 
2957 
2958   if (_signature != NSignature::kEcd)
2959   {
2960     HeadersError = true;
2961     return S_OK;
2962   }
2963 
2964 
2965   CanStartNewVol = false;
2966 
2967   // ---------- ECD ----------
2968 
2969   CEcd ecd;
2970   {
2971     const unsigned kBufSize = kEcdSize - 4;
2972     Byte buf[kBufSize];
2973     SafeRead(buf, kBufSize);
2974     ecd.Parse(buf);
2975   }
2976 
2977   COPY_ECD_ITEM_16(ThisDisk)
2978   COPY_ECD_ITEM_16(CdDisk)
2979   COPY_ECD_ITEM_16(NumEntries_in_ThisDisk)
2980   COPY_ECD_ITEM_16(NumEntries)
2981   COPY_ECD_ITEM_32(Size)
2982   COPY_ECD_ITEM_32(Offset)
2983 
2984   bool cdOK = true;
2985 
2986   if ((UInt32)cdInfo.Size != (UInt32)cdSize)
2987   {
2988     // return S_FALSE;
2989     cdOK = false;
2990   }
2991 
2992   if (isZip64)
2993   {
2994     if (cdInfo.NumEntries != numCdItems
2995         || cdInfo.Size != cdSize)
2996     {
2997       cdOK = false;
2998     }
2999   }
3000 
3001 
3002   if (IsMultiVol)
3003   {
3004     if (cdDisk != cdInfo.CdDisk)
3005       HeadersError = true;
3006   }
3007   else if (needSetBase && cdOK)
3008   {
3009     const UInt64 oldBase = (UInt64)ArcInfo.Base;
3010     // localsWereRead == true
3011     // ArcInfo.Base == ArcInfo.MarkerPos2
3012     // cdRelatOffset == (cdAbsOffset - ArcInfo.Base)
3013 
3014     if (isZip64)
3015     {
3016       if (ecd64Disk == Vols.StartVolIndex)
3017       {
3018         const Int64 newBase = (Int64)ecd64AbsOffset - (Int64)locator.Ecd64Offset;
3019         if (newBase <= (Int64)ecd64AbsOffset)
3020         {
3021           if (!localsWereRead || newBase <= (Int64)ArcInfo.MarkerPos2)
3022           {
3023             ArcInfo.Base = newBase;
3024             cdRelatOffset = (UInt64)((Int64)cdAbsOffset - newBase);
3025           }
3026           else
3027             cdOK = false;
3028         }
3029       }
3030     }
3031     else if (numCdItems != 0) // we can't use ecd.Offset in empty archive?
3032     {
3033       if ((int)cdDisk == Vols.StartVolIndex)
3034       {
3035         const Int64 newBase = (Int64)cdAbsOffset - (Int64)cdInfo.Offset;
3036         if (newBase <= (Int64)cdAbsOffset)
3037         {
3038           if (!localsWereRead || newBase <= (Int64)ArcInfo.MarkerPos2)
3039           {
3040             // cd can be more accurate, when it points before Locals
3041             // so we change Base and cdRelatOffset
3042             ArcInfo.Base = newBase;
3043             cdRelatOffset = cdInfo.Offset;
3044           }
3045           else
3046           {
3047             // const UInt64 delta = ((UInt64)cdRelatOffset - cdInfo.Offset);
3048             const UInt64 delta = ((UInt64)(newBase - ArcInfo.Base));
3049             if ((UInt32)delta == 0)
3050             {
3051               // we set Overflow32bit mode, only if there is (x<<32) offset
3052               // between real_CD_offset_from_MarkerPos and CD_Offset_in_ECD.
3053               // Base and cdRelatOffset unchanged
3054               Overflow32bit = true;
3055             }
3056             else
3057               cdOK = false;
3058           }
3059         }
3060         else
3061           cdOK = false;
3062       }
3063     }
3064     // cdRelatOffset = cdAbsOffset - ArcInfo.Base;
3065 
3066     if (localsWereRead)
3067     {
3068       const UInt64 delta = (UInt64)((Int64)oldBase - ArcInfo.Base);
3069       if (delta != 0)
3070       {
3071         FOR_VECTOR (i, items)
3072           items[i].LocalHeaderPos += delta;
3073       }
3074     }
3075   }
3076 
3077   if (!cdOK)
3078     HeadersError = true;
3079 
3080   EcdVolIndex = cdInfo.ThisDisk;
3081 
3082   if (!IsMultiVol)
3083   {
3084     if (EcdVolIndex == 0 && Vols.MissingZip && Vols.StartIsExe)
3085     {
3086       Vols.MissingName.Empty();
3087       Vols.MissingZip = false;
3088     }
3089 
3090     if (localsWereRead)
3091     {
3092       if (EcdVolIndex != 0)
3093       {
3094         FOR_VECTOR (i, items)
3095           items[i].Disk = EcdVolIndex;
3096       }
3097     }
3098 
3099     UseDisk_in_SingleVol = true;
3100   }
3101 
3102   if (isZip64)
3103   {
3104     if ((cdInfo.ThisDisk == 0 && ecd64AbsOffset != (UInt64)(ArcInfo.Base + (Int64)locator.Ecd64Offset))
3105         // || cdInfo.NumEntries_in_ThisDisk != numCdItems
3106         || cdInfo.NumEntries != numCdItems
3107         || cdInfo.Size != cdSize
3108         || (cdInfo.Offset != cdRelatOffset && !items.IsEmpty()))
3109     {
3110       HeadersError = true;
3111       return S_OK;
3112     }
3113   }
3114 
3115   if (cdOK && !cdItems.IsEmpty())
3116   {
3117     // ---------- merge Central Directory Items ----------
3118 
3119     CRecordVector<unsigned> items2;
3120 
3121     int nextLocalIndex = 0;
3122 
3123     LocalsCenterMerged = true;
3124 
3125     FOR_VECTOR (i, cdItems)
3126     {
3127       if (Callback)
3128       if ((i & 0x3FFF) == 0)
3129       {
3130         const UInt64 numFiles64 = items.Size() + items2.Size();
3131         RINOK(Callback->SetCompleted(&numFiles64, &_cnt))
3132       }
3133 
3134       const CItemEx &cdItem = cdItems[i];
3135 
3136       int index = -1;
3137 
3138       if (nextLocalIndex != -1)
3139       {
3140         if ((unsigned)nextLocalIndex < items.Size())
3141         {
3142           CItemEx &item = items[(unsigned)nextLocalIndex];
3143           if (item.Disk == cdItem.Disk &&
3144               (item.LocalHeaderPos == cdItem.LocalHeaderPos
3145               || (Overflow32bit && (UInt32)item.LocalHeaderPos == cdItem.LocalHeaderPos)))
3146             index = nextLocalIndex++;
3147           else
3148             nextLocalIndex = -1;
3149         }
3150       }
3151 
3152       if (index == -1)
3153         index = FindItem(items, cdItem);
3154 
3155       // index = -1;
3156 
3157       if (index == -1)
3158       {
3159         items2.Add(i);
3160         HeadersError = true;
3161         continue;
3162       }
3163 
3164       CItemEx &item = items[(unsigned)index];
3165       if (item.Name != cdItem.Name
3166           // || item.Name.Len() != cdItem.Name.Len()
3167           || item.PackSize != cdItem.PackSize
3168           || item.Size != cdItem.Size
3169           // item.ExtractVersion != cdItem.ExtractVersion
3170           || !FlagsAreSame(item, cdItem)
3171           || item.Crc != cdItem.Crc)
3172       {
3173         HeadersError = true;
3174         continue;
3175       }
3176 
3177       // item.Name = cdItem.Name;
3178       item.MadeByVersion = cdItem.MadeByVersion;
3179       item.CentralExtra = cdItem.CentralExtra;
3180       item.InternalAttrib = cdItem.InternalAttrib;
3181       item.ExternalAttrib = cdItem.ExternalAttrib;
3182       item.Comment = cdItem.Comment;
3183       item.FromCentral = cdItem.FromCentral;
3184       // 22.02: we force utf8 flag, if central header has utf8 flag
3185       if (cdItem.Flags & NFileHeader::NFlags::kUtf8)
3186         item.Flags |= NFileHeader::NFlags::kUtf8;
3187     }
3188 
3189     FOR_VECTOR (k, items2)
3190       items.Add(cdItems[items2[k]]);
3191   }
3192 
3193   if (ecd.NumEntries < ecd.NumEntries_in_ThisDisk)
3194     HeadersError = true;
3195 
3196   if (ecd.ThisDisk == 0)
3197   {
3198     // if (isZip64)
3199     {
3200       if (ecd.NumEntries != ecd.NumEntries_in_ThisDisk)
3201         HeadersError = true;
3202     }
3203   }
3204 
3205   if (isZip64)
3206   {
3207     if (cdInfo.NumEntries != items.Size()
3208         || (ecd.NumEntries != items.Size() && ecd.NumEntries != 0xFFFF))
3209       HeadersError = true;
3210   }
3211   else
3212   {
3213     // old 7-zip could store 32-bit number of CD items to 16-bit field.
3214     // if (ecd.NumEntries != items.Size())
3215     if (ecd.NumEntries > items.Size())
3216       HeadersError = true;
3217 
3218     if (cdInfo.NumEntries != numCdItems)
3219     {
3220       if ((UInt16)cdInfo.NumEntries != (UInt16)numCdItems)
3221         HeadersError = true;
3222       else
3223         Cd_NumEntries_Overflow_16bit = true;
3224     }
3225   }
3226 
3227   ReadBuffer(ArcInfo.Comment, ecd.CommentSize);
3228 
3229   _inBufMode = false;
3230 
3231   // DisableBufMode();
3232   // Buffer.Free();
3233   /* we can't clear buf varibles. we need them to calculate PhySize of archive */
3234 
3235   if ((UInt16)cdInfo.NumEntries != (UInt16)numCdItems
3236       || (UInt32)cdInfo.Size != (UInt32)cdSize
3237       || ((UInt32)cdInfo.Offset != (UInt32)cdRelatOffset && !items.IsEmpty()))
3238   {
3239     // return S_FALSE;
3240     HeadersError = true;
3241   }
3242 
3243   #ifdef ZIP_SELF_CHECK
3244   if (localsWereRead)
3245   {
3246     const UInt64 endPos = ArcInfo.MarkerPos2 + _cnt;
3247     if (endPos != (IsMultiVol ? Vols.TotalBytesSize : ArcInfo.FileEndPos))
3248     {
3249       // there are some data after the end of archive or error in code;
3250       return E_FAIL;
3251     }
3252   }
3253   #endif
3254 
3255   // printf("\nOpen OK");
3256   return S_OK;
3257 }
3258 
3259 
3260 
Open(IInStream * stream,const UInt64 * searchLimit,IArchiveOpenCallback * callback,CObjectVector<CItemEx> & items)3261 HRESULT CInArchive::Open(IInStream *stream, const UInt64 *searchLimit,
3262     IArchiveOpenCallback *callback, CObjectVector<CItemEx> &items)
3263 {
3264   items.Clear();
3265 
3266   Close();
3267 
3268   UInt64 startPos;
3269   RINOK(InStream_GetPos(stream, startPos))
3270   RINOK(InStream_GetSize_SeekToEnd(stream, ArcInfo.FileEndPos))
3271   _streamPos = ArcInfo.FileEndPos;
3272 
3273   StartStream = stream;
3274   Stream = stream;
3275   Callback = callback;
3276 
3277   DisableBufMode();
3278 
3279   bool volWasRequested = false;
3280 
3281   if (!Disable_VolsRead)
3282   if (callback
3283       && (startPos == 0 || !searchLimit || *searchLimit != 0))
3284   {
3285     // we try to read volumes only if it's first call (offset == 0) or scan is allowed.
3286     volWasRequested = true;
3287     RINOK(ReadVols())
3288   }
3289 
3290   if (Disable_FindMarker)
3291   {
3292     RINOK(SeekToVol(-1, startPos))
3293     StreamRef = stream;
3294     Stream = stream;
3295     MarkerIsFound = true;
3296     MarkerIsSafe = true;
3297     ArcInfo.MarkerPos = startPos;
3298     ArcInfo.MarkerPos2 = startPos;
3299   }
3300   else
3301   if (IsMultiVol && Vols.StartParsingVol == 0 && (unsigned)Vols.StartParsingVol < Vols.Streams.Size())
3302   {
3303     // only StartParsingVol = 0 is safe search.
3304     RINOK(SeekToVol(0, 0))
3305     // if (Stream)
3306     {
3307       // UInt64 limit = 1 << 22; // for sfx
3308       UInt64 limit = 0; // without sfx
3309 
3310       HRESULT res = FindMarker(&limit);
3311 
3312       if (res == S_OK)
3313       {
3314         MarkerIsFound = true;
3315         MarkerIsSafe = true;
3316       }
3317       else if (res != S_FALSE)
3318         return res;
3319     }
3320   }
3321   else
3322   {
3323     // printf("\nOpen offset = %u\n", (unsigned)startPos);
3324     if (IsMultiVol
3325         && (unsigned)Vols.StartParsingVol < Vols.Streams.Size()
3326         && Vols.Streams[(unsigned)Vols.StartParsingVol].Stream)
3327     {
3328       RINOK(SeekToVol(Vols.StartParsingVol, Vols.StreamIndex == Vols.StartVolIndex ? startPos : 0))
3329     }
3330     else
3331     {
3332       RINOK(SeekToVol(-1, startPos))
3333     }
3334 
3335     // UInt64 limit = 1 << 22;
3336     // HRESULT res = FindMarker(&limit);
3337 
3338     HRESULT res = FindMarker(searchLimit);
3339 
3340     // const UInt64 curPos = GetVirtStreamPos();
3341     const UInt64 curPos = ArcInfo.MarkerPos2 + 4;
3342 
3343     if (res == S_OK)
3344       MarkerIsFound = true;
3345     else if (!IsMultiVol)
3346     {
3347       /*
3348       // if (startPos != 0), probably CD could be already tested with another call with (startPos == 0).
3349       // so we don't want to try to open CD again in that case.
3350       if (startPos != 0)
3351         return res;
3352       // we can try to open CD, if there is no Marker and (startPos == 0).
3353       // is it OK to open such files as ZIP, or big number of false positive, when CD can be find in end of file ?
3354       */
3355       return res;
3356     }
3357 
3358     if (ArcInfo.IsSpanMode && !volWasRequested)
3359     {
3360       RINOK(ReadVols())
3361       if (IsMultiVol && MarkerIsFound && ArcInfo.MarkerVolIndex < 0)
3362         ArcInfo.MarkerVolIndex = Vols.StartVolIndex;
3363     }
3364 
3365     MarkerIsSafe = !IsMultiVol
3366         || (ArcInfo.MarkerVolIndex == 0 && ArcInfo.MarkerPos == 0)
3367         ;
3368 
3369 
3370     if (IsMultiVol)
3371     {
3372       if ((unsigned)Vols.StartVolIndex < Vols.Streams.Size())
3373       {
3374         Stream = Vols.Streams[(unsigned)Vols.StartVolIndex].Stream;
3375         if (Stream)
3376         {
3377           RINOK(Seek_SavePos(curPos))
3378         }
3379         else
3380           IsMultiVol = false;
3381       }
3382       else
3383         IsMultiVol = false;
3384     }
3385 
3386     if (!IsMultiVol)
3387     {
3388       if (Vols.StreamIndex != -1)
3389       {
3390         Stream = StartStream;
3391         Vols.StreamIndex = -1;
3392         InitBuf();
3393         RINOK(Seek_SavePos(curPos))
3394       }
3395 
3396       ArcInfo.MarkerVolIndex = -1;
3397       StreamRef = stream;
3398       Stream = stream;
3399     }
3400   }
3401 
3402 
3403   if (!IsMultiVol)
3404     Vols.ClearRefs();
3405 
3406   {
3407     HRESULT res;
3408     try
3409     {
3410       res = ReadHeaders(items);
3411     }
3412     catch (const CSystemException &e) { res = e.ErrorCode; }
3413     catch (const CUnexpectEnd &)
3414     {
3415       if (items.IsEmpty())
3416         return S_FALSE;
3417       UnexpectedEnd = true;
3418       res = S_OK;
3419     }
3420     catch (...)
3421     {
3422       DisableBufMode();
3423       throw;
3424     }
3425 
3426     if (IsMultiVol)
3427     {
3428       ArcInfo.FinishPos = ArcInfo.FileEndPos;
3429       if ((unsigned)Vols.StreamIndex < Vols.Streams.Size())
3430         if (GetVirtStreamPos() < Vols.Streams[(unsigned)Vols.StreamIndex].Size)
3431           ArcInfo.ThereIsTail = true;
3432     }
3433     else
3434     {
3435       ArcInfo.FinishPos = GetVirtStreamPos();
3436       ArcInfo.ThereIsTail = (ArcInfo.FileEndPos > ArcInfo.FinishPos);
3437     }
3438 
3439     DisableBufMode();
3440 
3441     IsArcOpen = true;
3442     if (!IsMultiVol)
3443       Vols.Streams.Clear();
3444     return res;
3445   }
3446 }
3447 
3448 
GetItemStream(const CItemEx & item,bool seekPackData,CMyComPtr<ISequentialInStream> & stream)3449 HRESULT CInArchive::GetItemStream(const CItemEx &item, bool seekPackData, CMyComPtr<ISequentialInStream> &stream)
3450 {
3451   stream.Release();
3452 
3453   UInt64 pos = item.LocalHeaderPos;
3454   if (seekPackData)
3455     pos += item.LocalFullHeaderSize;
3456 
3457   if (!IsMultiVol)
3458   {
3459     if (UseDisk_in_SingleVol && item.Disk != EcdVolIndex)
3460       return S_OK;
3461     pos = (UInt64)((Int64)pos + ArcInfo.Base);
3462     RINOK(InStream_SeekSet(StreamRef, pos))
3463     stream = StreamRef;
3464     return S_OK;
3465   }
3466 
3467   if (item.Disk >= Vols.Streams.Size())
3468     return S_OK;
3469 
3470   IInStream *str2 = Vols.Streams[item.Disk].Stream;
3471   if (!str2)
3472     return S_OK;
3473   RINOK(InStream_SeekSet(str2, pos))
3474 
3475   Vols.NeedSeek = false;
3476   Vols.StreamIndex = (int)item.Disk;
3477 
3478   CVolStream *volsStreamSpec = new CVolStream;
3479   volsStreamSpec->Vols = &Vols;
3480   stream = volsStreamSpec;
3481 
3482   return S_OK;
3483 }
3484 
3485 }}
3486