xref: /aosp_15_r20/external/lzma/CPP/7zip/Archive/Wim/WimIn.h (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // Archive/WimIn.h
2 
3 #ifndef ZIP7_INC_ARCHIVE_WIM_IN_H
4 #define ZIP7_INC_ARCHIVE_WIM_IN_H
5 
6 #include "../../../../C/Alloc.h"
7 
8 #include "../../../Common/AutoPtr.h"
9 #include "../../../Common/MyBuffer.h"
10 #include "../../../Common/MyXml.h"
11 
12 #include "../../../Windows/PropVariant.h"
13 
14 #include "../../Compress/CopyCoder.h"
15 #include "../../Compress/LzmsDecoder.h"
16 #include "../../Compress/LzxDecoder.h"
17 
18 #include "../IArchive.h"
19 
20 namespace NArchive {
21 namespace NWim {
22 
23 /*
24 WIM versions:
25 hexVer : headerSize : ver
26  : 1.07.01 - 1.08.01 : Longhorn.4001-4015 - another header, no signature, CAB compression
27 10900 : 60 : 1.09 : Longhorn.4029-4039 (2003)
28 10A00 : 60 : 1.10 : Longhorn.4083 (2004) image starting from 1
29 10B00 : ?? : 1.11 : ??
30 10C00 : 74 : 1.12 : Longhorn.4093 - VistaBeta1.5112 (2005) - (Multi-Part, SHA1)
31 10D00 : D0 : 1.13 : VistaBeta2 - Win10, (NumImages, BootIndex, IntegrityResource)
32 00E00 : D0 : 0.14 : LZMS, solid, esd, dism
33 */
34 
35 const unsigned kDirRecordSizeOld = 62;
36 const unsigned kDirRecordSize = 102;
37 
38 /*
39   There is error in WIM specification about dwReparseTag, dwReparseReserved and liHardLink fields.
40 
41   Correct DIRENTRY structure:
42   {
43     hex offset
44      0    UInt64  Len;
45      8    UInt32  Attrib;
46      C    UInt32  SecurityId;
47 
48     10    UInt64  SubdirOffset; // = 0 for files
49 
50     18    UInt64  unused1; // = 0?
51     20    UInt64  unused2; // = 0?
52 
53     28    UInt64  CTime;
54     30    UInt64  ATime;
55     38    UInt64  MTime;
56 
57     40    Byte    Sha1[20];
58 
59     54    UInt32  Unknown1; // is it 0 always?
60 
61 
62     union
63     {
64     58    UInt64  NtNodeId;
65         {
66     58    UInt32  ReparseTag;
67     5C    UInt32  ReparseFlags; // is it 0 always? Check with new imagex.
68         }
69     }
70 
71     60    UInt16  Streams;
72 
73     62    UInt16  ShortNameLen;
74     64    UInt16  FileNameLen;
75 
76     66    UInt16  Name[];
77           UInt16  ShortName[];
78   }
79 
80   // DIRENTRY for WIM_VERSION <= 1.10
81   DIRENTRY_OLD structure:
82   {
83     hex offset
84      0    UInt64  Len;
85      8    UInt32  Attrib;
86      C    UInt32  SecurityId;
87 
88     union
89     {
90     10    UInt64  SubdirOffset; //
91 
92     10    UInt32  OldWimFileId; // used for files in old WIMs
93     14    UInt32  OldWimFileId_Reserved; // = 0
94     }
95 
96     18    UInt64  CTime;
97     20    UInt64  ATime;
98     28    UInt64  MTime;
99 
100     30    UInt64  Unknown; // NtNodeId ?
101 
102     38    UInt16  Streams;
103     3A    UInt16  ShortNameLen;
104     3C    UInt16  FileNameLen;
105     3E    UInt16  FileName[];
106           UInt16  ShortName[];
107   }
108 
109   ALT_STREAM structure:
110   {
111     hex offset
112      0    UInt64  Len;
113      8    UInt64  Unused;
114     10    Byte    Sha1[20];
115     24    UInt16  FileNameLen;
116     26    UInt16  FileName[];
117   }
118 
119   ALT_STREAM_OLD structure:
120   {
121     hex offset
122      0    UInt64  Len;
123      8    UInt64  StreamId; // 32-bit value
124     10    UInt16  FileNameLen;
125     12    UInt16  FileName[];
126   }
127 
128   If item is file (not Directory) and there are alternative streams,
129   there is additional ALT_STREAM item of main "unnamed" stream in Streams array.
130 
131 */
132 
133 
134 namespace NResourceFlags
135 {
136   // const Byte kFree = 1 << 0;
137   const Byte kMetadata = 1 << 1;
138   const Byte kCompressed = 1 << 2;
139   // const Byte kSpanned = 1 << 3;
140   const Byte kSolid = 1 << 4;
141 }
142 
143 const UInt64 k_SolidBig_Resource_Marker = (UInt64)1 << 32;
144 
145 struct CResource
146 {
147   UInt64 PackSize;
148   UInt64 Offset;
149   UInt64 UnpackSize;
150   Byte Flags;
151   bool KeepSolid;
152   int SolidIndex;
153 
ClearCResource154   void Clear()
155   {
156     PackSize = 0;
157     Offset = 0;
158     UnpackSize = 0;
159     Flags = 0;
160     KeepSolid = false;
161     SolidIndex = -1;
162   }
163 
GetEndLimitCResource164   UInt64 GetEndLimit() const { return Offset + PackSize; }
165   void Parse(const Byte *p);
ParseAndUpdatePhySizeCResource166   void ParseAndUpdatePhySize(const Byte *p, UInt64 &phySize)
167   {
168     Parse(p);
169     UInt64 v = GetEndLimit();
170     if (phySize < v)
171       phySize = v;
172   }
173 
174   void WriteTo(Byte *p) const;
175 
IsMetadataCResource176   bool IsMetadata() const { return (Flags & NResourceFlags::kMetadata) != 0; }
IsCompressedCResource177   bool IsCompressed() const { return (Flags & NResourceFlags::kCompressed) != 0; }
IsSolidCResource178   bool IsSolid() const { return (Flags & NResourceFlags::kSolid) != 0; }
IsSolidBigCResource179   bool IsSolidBig() const { return IsSolid() && UnpackSize == k_SolidBig_Resource_Marker; }
IsSolidSmallCResource180   bool IsSolidSmall() const { return IsSolid() && UnpackSize == 0; }
181 
IsEmptyCResource182   bool IsEmpty() const { return (UnpackSize == 0); }
183 };
184 
185 
186 struct CSolid
187 {
188   unsigned StreamIndex;
189   // unsigned NumRefs;
190   int FirstSmallStream;
191 
192   UInt64 SolidOffset;
193 
194   UInt64 UnpackSize;
195   int Method;
196   unsigned ChunkSizeBits;
197 
198   UInt64 HeadersSize;
199   // size_t NumChunks;
200   CObjArray<UInt64> Chunks; // [NumChunks + 1] (start offset)
201 
GetChunkPackSizeCSolid202   UInt64 GetChunkPackSize(size_t chunkIndex) const { return Chunks[chunkIndex + 1] - Chunks[chunkIndex]; }
203 
CSolidCSolid204   CSolid():
205       FirstSmallStream(-1),
206       // NumRefs(0),
207       Method(-1)
208       {}
209 };
210 
211 
212 namespace NHeaderFlags
213 {
214   const UInt32 kCompression  = 1 << 1;
215   const UInt32 kReadOnly     = 1 << 2;
216   const UInt32 kSpanned      = 1 << 3;
217   const UInt32 kResourceOnly = 1 << 4;
218   const UInt32 kMetadataOnly = 1 << 5;
219   const UInt32 kWriteInProgress = 1 << 6;
220   const UInt32 kReparsePointFixup = 1 << 7;
221 
222   const UInt32 kXPRESS       = (UInt32)1 << 17;
223   const UInt32 kLZX          = (UInt32)1 << 18;
224   const UInt32 kLZMS         = (UInt32)1 << 19;
225   const UInt32 kXPRESS2      = (UInt32)1 << 21; // XPRESS with nonstandard chunk size ?
226 
227   const UInt32 kMethodMask   = 0xFFFE0000;
228 }
229 
230 
231 namespace NMethod
232 {
233   const UInt32 kXPRESS = 1;
234   const UInt32 kLZX    = 2;
235   const UInt32 kLZMS   = 3;
236 }
237 
238 
239 const UInt32 k_Version_NonSolid = 0x10D00;
240 const UInt32 k_Version_Solid = 0xE00;
241 
242 const unsigned kHeaderSizeMax = 0xD0;
243 const unsigned kSignatureSize = 8;
244 extern const Byte kSignature[kSignatureSize];
245 
246 const unsigned kChunkSizeBits = 15;
247 const UInt32 kChunkSize = (UInt32)1 << kChunkSizeBits;
248 
249 
250 struct CHeader
251 {
252   UInt32 Version;
253   UInt32 Flags;
254   UInt32 ChunkSize;
255   unsigned ChunkSizeBits;
256   Byte Guid[16];
257   UInt16 PartNumber;
258   UInt16 NumParts;
259   UInt32 NumImages;
260   UInt32 BootIndex;
261 
262   bool _isOldVersion; // 1.10-
263   bool _isNewVersion; // 1.13+ or 0.14
264 
265   CResource OffsetResource;
266   CResource XmlResource;
267   CResource MetadataResource;
268   CResource IntegrityResource;
269 
270   void SetDefaultFields(bool useLZX);
271 
272   void WriteTo(Byte *p) const;
273   HRESULT Parse(const Byte *p, UInt64 &phySize);
274 
IsCompressedCHeader275   bool IsCompressed() const { return (Flags & NHeaderFlags::kCompression) != 0; }
276 
IsSupportedCHeader277   bool IsSupported() const
278   {
279     return (!IsCompressed()
280         || (Flags & NHeaderFlags::kLZX) != 0
281         || (Flags & NHeaderFlags::kXPRESS) != 0
282         || (Flags & NHeaderFlags::kLZMS) != 0
283         || (Flags & NHeaderFlags::kXPRESS2) != 0);
284   }
285 
GetMethodCHeader286   unsigned GetMethod() const
287   {
288     if (!IsCompressed())
289       return 0;
290     UInt32 mask = (Flags & NHeaderFlags::kMethodMask);
291     if (mask == 0) return 0;
292     if (mask == NHeaderFlags::kXPRESS) return NMethod::kXPRESS;
293     if (mask == NHeaderFlags::kLZX) return NMethod::kLZX;
294     if (mask == NHeaderFlags::kLZMS) return NMethod::kLZMS;
295     if (mask == NHeaderFlags::kXPRESS2) return NMethod::kXPRESS;
296     return mask;
297   }
298 
IsOldVersionCHeader299   bool IsOldVersion() const { return _isOldVersion; }
IsNewVersionCHeader300   bool IsNewVersion() const { return _isNewVersion; }
IsSolidVersionCHeader301   bool IsSolidVersion() const { return (Version == k_Version_Solid); }
302 
AreFromOnArchiveCHeader303   bool AreFromOnArchive(const CHeader &h)
304   {
305     return (memcmp(Guid, h.Guid, sizeof(Guid)) == 0) && (h.NumParts == NumParts);
306   }
307 };
308 
309 
310 const unsigned kHashSize = 20;
311 
IsEmptySha(const Byte * data)312 inline bool IsEmptySha(const Byte *data)
313 {
314   for (unsigned i = 0; i < kHashSize; i++)
315     if (data[i] != 0)
316       return false;
317   return true;
318 }
319 
320 const unsigned kStreamInfoSize = 24 + 2 + 4 + kHashSize;
321 
322 struct CStreamInfo
323 {
324   CResource Resource;
325   UInt16 PartNumber;      // for NEW WIM format, we set it to 1 for OLD WIM format
326   UInt32 RefCount;
327   UInt32 Id;              // for OLD WIM format
328   Byte Hash[kHashSize];
329 
IsEmptyHashCStreamInfo330   bool IsEmptyHash() const { return IsEmptySha(Hash); }
331 
332   void WriteTo(Byte *p) const;
333 };
334 
335 
336 struct CItem
337 {
338   size_t Offset;
339   int IndexInSorted;
340   int StreamIndex;
341   int Parent;
342   int ImageIndex; // -1 means that file is unreferenced in Images (deleted item?)
343   bool IsDir;
344   bool IsAltStream;
345 
HasMetadataCItem346   bool HasMetadata() const { return ImageIndex >= 0; }
347 
CItemCItem348   CItem():
349     IndexInSorted(-1),
350     StreamIndex(-1),
351     Parent(-1),
352     IsDir(false),
353     IsAltStream(false)
354     {}
355 };
356 
357 struct CImage
358 {
359   CByteBuffer Meta;
360   CRecordVector<UInt32> SecurOffsets;
361   unsigned StartItem;
362   unsigned NumItems;
363   unsigned NumEmptyRootItems;
364   int VirtualRootIndex; // index in CDatabase::VirtualRoots[]
365   UString RootName;
366   CByteBuffer RootNameBuf;
367 
CImageCImage368   CImage(): VirtualRootIndex(-1) {}
369 };
370 
371 
372 struct CImageInfo
373 {
374   bool CTimeDefined;
375   bool MTimeDefined;
376   bool NameDefined;
377   bool IndexDefined;
378 
379   FILETIME CTime;
380   FILETIME MTime;
381   UString Name;
382 
383   UInt64 DirCount;
384   UInt64 FileCount;
385   UInt32 Index;
386 
387   int ItemIndexInXml;
388 
GetTotalFilesAndDirsCImageInfo389   UInt64 GetTotalFilesAndDirs() const { return DirCount + FileCount; }
390 
CImageInfoCImageInfo391   CImageInfo(): CTimeDefined(false), MTimeDefined(false), NameDefined(false),
392       IndexDefined(false), ItemIndexInXml(-1) {}
393   void Parse(const CXmlItem &item);
394 };
395 
396 
397 struct CWimXml
398 {
399   CByteBuffer Data;
400   CXml Xml;
401 
402   UInt16 VolIndex;
403   CObjectVector<CImageInfo> Images;
404 
405   UString FileName;
406   bool IsEncrypted;
407 
GetTotalFilesAndDirsCWimXml408   UInt64 GetTotalFilesAndDirs() const
409   {
410     UInt64 sum = 0;
411     FOR_VECTOR (i, Images)
412       sum += Images[i].GetTotalFilesAndDirs();
413     return sum;
414   }
415 
416   void ToUnicode(UString &s);
417   bool Parse();
418 
CWimXmlCWimXml419   CWimXml(): IsEncrypted(false) {}
420 };
421 
422 
423 struct CVolume
424 {
425   CHeader Header;
426   CMyComPtr<IInStream> Stream;
427 };
428 
429 
430 class CDatabase
431 {
432   Byte *DirData;
433   size_t DirSize;
434   size_t DirProcessed;
435   size_t DirStartOffset;
436   IArchiveOpenCallback *OpenCallback;
437 
438   HRESULT ParseDirItem(size_t pos, int parent);
439   HRESULT ParseImageDirs(CByteBuffer &buf, int parent);
440 
441 public:
442   CRecordVector<CStreamInfo> DataStreams;
443   CRecordVector<CStreamInfo> MetaStreams;
444 
445   CObjectVector<CSolid> Solids;
446 
447   CRecordVector<CItem> Items;
448   CObjectVector<CByteBuffer> ReparseItems;
449   CIntVector ItemToReparse; // from index_in_Items to index_in_ReparseItems
450                             // -1 means no reparse;
451 
452   CObjectVector<CImage> Images;
453 
454   bool IsOldVersion9;
455   bool IsOldVersion;
456   bool ThereAreDeletedStreams;
457   bool ThereAreAltStreams;
458   bool RefCountError;
459   bool HeadersError;
460 
GetStartImageIndex()461   unsigned GetStartImageIndex() const { return IsOldVersion9 ? 0 : 1; }
GetDirAlignMask()462   unsigned GetDirAlignMask() const { return IsOldVersion9 ? 3 : 7; }
463 
464   // User Items can contain all images or just one image from all.
465   CUIntVector SortedItems;
466   int IndexOfUserImage;    // -1 : if more than one images was filled to Sorted Items
467 
468   unsigned NumExcludededItems;
469   int ExludedItem;          // -1 : if there are no exclude items
470   CUIntVector VirtualRoots; // we use them for old 1.10 WIM archives
471 
ThereIsError()472   bool ThereIsError() const { return RefCountError || HeadersError; }
473 
GetNumUserItemsInImage(unsigned imageIndex)474   unsigned GetNumUserItemsInImage(unsigned imageIndex) const
475   {
476     if (IndexOfUserImage >= 0 && imageIndex != (unsigned)IndexOfUserImage)
477       return 0;
478     if (imageIndex >= Images.Size())
479       return 0;
480     return Images[imageIndex].NumItems - NumExcludededItems;
481   }
482 
483   bool ItemHasStream(const CItem &item) const;
484 
Get_UnpackSize_of_Resource(const CResource & r)485   UInt64 Get_UnpackSize_of_Resource(const CResource &r) const
486   {
487     if (!r.IsSolid())
488       return r.UnpackSize;
489     if (r.IsSolidSmall())
490       return r.PackSize;
491     if (r.IsSolidBig() && r.SolidIndex >= 0)
492       return Solids[(unsigned)r.SolidIndex].UnpackSize;
493     return 0;
494   }
495 
Get_PackSize_of_Resource(unsigned streamIndex)496   UInt64 Get_PackSize_of_Resource(unsigned streamIndex) const
497   {
498     const CResource &r = DataStreams[streamIndex].Resource;
499     if (!r.IsSolidSmall())
500       return r.PackSize;
501     if (r.SolidIndex >= 0)
502     {
503       const CSolid &ss = Solids[(unsigned)r.SolidIndex];
504       if (ss.FirstSmallStream == (int)streamIndex)
505         return DataStreams[ss.StreamIndex].Resource.PackSize;
506     }
507     return 0;
508   }
509 
GetUnpackSize()510   UInt64 GetUnpackSize() const
511   {
512     UInt64 res = 0;
513     FOR_VECTOR (i, DataStreams)
514       res += DataStreams[i].Resource.UnpackSize;
515     return res;
516   }
517 
GetPackSize()518   UInt64 GetPackSize() const
519   {
520     UInt64 res = 0;
521     FOR_VECTOR (i, DataStreams)
522       res += DataStreams[i].Resource.PackSize;
523     return res;
524   }
525 
Clear()526   void Clear()
527   {
528     DataStreams.Clear();
529     MetaStreams.Clear();
530     Solids.Clear();
531 
532     Items.Clear();
533     ReparseItems.Clear();
534     ItemToReparse.Clear();
535 
536     SortedItems.Clear();
537 
538     Images.Clear();
539     VirtualRoots.Clear();
540 
541     IsOldVersion = false;
542     ThereAreDeletedStreams = false;
543     ThereAreAltStreams = false;
544     RefCountError = false;
545     HeadersError = false;
546   }
547 
CDatabase()548   CDatabase():
549     RefCountError(false),
550     HeadersError(false)
551     {}
552 
553   void GetShortName(unsigned index, NWindows::NCOM::CPropVariant &res) const;
554   void GetItemName(unsigned index1, NWindows::NCOM::CPropVariant &res) const;
555   void GetItemPath(unsigned index, bool showImageNumber, NWindows::NCOM::CPropVariant &res) const;
556 
557   HRESULT OpenXml(IInStream *inStream, const CHeader &h, CByteBuffer &xml);
558   HRESULT Open(IInStream *inStream, const CHeader &h, unsigned numItemsReserve, IArchiveOpenCallback *openCallback);
559   HRESULT FillAndCheck(const CObjectVector<CVolume> &volumes);
560 
561   /*
562     imageIndex showImageNumber NumImages
563          *        true           *       Show Image_Number
564         -1           *          >1       Show Image_Number
565         -1        false          1       Don't show Image_Number
566          N        false          *       Don't show Image_Number
567   */
568   HRESULT GenerateSortedItems(int imageIndex, bool showImageNumber);
569 
570   HRESULT ExtractReparseStreams(const CObjectVector<CVolume> &volumes, IArchiveOpenCallback *openCallback);
571 };
572 
573 HRESULT ReadHeader(IInStream *inStream, CHeader &header, UInt64 &phySize);
574 
575 
576 struct CMidBuf
577 {
578   Byte *Data;
579   size_t _size;
580 
CMidBufCMidBuf581   CMidBuf(): Data(NULL), _size(0) {}
582 
EnsureCapacityCMidBuf583   void EnsureCapacity(size_t size)
584   {
585     if (size > _size)
586     {
587       ::z7_AlignedFree(Data);
588       _size = 0;
589       Data = (Byte *)::z7_AlignedAlloc(size);
590       if (Data)
591         _size = size;
592     }
593   }
594 
~CMidBufCMidBuf595   ~CMidBuf() { ::z7_AlignedFree(Data); }
596 };
597 
598 
599 class CUnpacker
600 {
601   CMyComPtr2<ICompressCoder, NCompress::CCopyCoder> copyCoder;
602   CMyUniquePtr<NCompress::NLzx::CDecoder> lzxDecoder;
603   CMyUniquePtr<NCompress::NLzms::CDecoder> lzmsDecoder;
604 
605   CByteBuffer sizesBuf;
606 
607   CMidBuf packBuf;
608   CMidBuf unpackBuf;
609 
610   // solid resource
611   int _solidIndex;
612   size_t _unpackedChunkIndex;
613 
614   HRESULT UnpackChunk(
615       ISequentialInStream *inStream,
616       unsigned method, unsigned chunkSizeBits,
617       size_t inSize, size_t outSize,
618       ISequentialOutStream *outStream);
619 
620   HRESULT Unpack2(
621       IInStream *inStream,
622       const CResource &res,
623       const CHeader &header,
624       const CDatabase *db,
625       ISequentialOutStream *outStream,
626       ICompressProgressInfo *progress);
627 
628 public:
629   UInt64 TotalPacked;
630 
CUnpacker()631   CUnpacker():
632       lzmsDecoder(NULL),
633       _solidIndex(-1),
634       _unpackedChunkIndex(0),
635       TotalPacked(0)
636       {}
637 
638   HRESULT Unpack(
639       IInStream *inStream,
640       const CResource &res,
641       const CHeader &header,
642       const CDatabase *db,
643       ISequentialOutStream *outStream,
644       ICompressProgressInfo *progress,
645       Byte *digest);
646 
647   HRESULT UnpackData(IInStream *inStream,
648       const CResource &resource, const CHeader &header,
649       const CDatabase *db,
650       CByteBuffer &buf, Byte *digest);
651 };
652 
653 }}
654 
655 #endif
656