1 // Archive/WimIn.h
2
3 #ifndef ZIP7_INC_ARCHIVE_WIM_IN_H
4 #define ZIP7_INC_ARCHIVE_WIM_IN_H
5
6 #include "../../../../C/Alloc.h"
7
8 #include "../../../Common/AutoPtr.h"
9 #include "../../../Common/MyBuffer.h"
10 #include "../../../Common/MyXml.h"
11
12 #include "../../../Windows/PropVariant.h"
13
14 #include "../../Compress/CopyCoder.h"
15 #include "../../Compress/LzmsDecoder.h"
16 #include "../../Compress/LzxDecoder.h"
17
18 #include "../IArchive.h"
19
20 namespace NArchive {
21 namespace NWim {
22
23 /*
24 WIM versions:
25 hexVer : headerSize : ver
26 : 1.07.01 - 1.08.01 : Longhorn.4001-4015 - another header, no signature, CAB compression
27 10900 : 60 : 1.09 : Longhorn.4029-4039 (2003)
28 10A00 : 60 : 1.10 : Longhorn.4083 (2004) image starting from 1
29 10B00 : ?? : 1.11 : ??
30 10C00 : 74 : 1.12 : Longhorn.4093 - VistaBeta1.5112 (2005) - (Multi-Part, SHA1)
31 10D00 : D0 : 1.13 : VistaBeta2 - Win10, (NumImages, BootIndex, IntegrityResource)
32 00E00 : D0 : 0.14 : LZMS, solid, esd, dism
33 */
34
35 const unsigned kDirRecordSizeOld = 62;
36 const unsigned kDirRecordSize = 102;
37
38 /*
39 There is error in WIM specification about dwReparseTag, dwReparseReserved and liHardLink fields.
40
41 Correct DIRENTRY structure:
42 {
43 hex offset
44 0 UInt64 Len;
45 8 UInt32 Attrib;
46 C UInt32 SecurityId;
47
48 10 UInt64 SubdirOffset; // = 0 for files
49
50 18 UInt64 unused1; // = 0?
51 20 UInt64 unused2; // = 0?
52
53 28 UInt64 CTime;
54 30 UInt64 ATime;
55 38 UInt64 MTime;
56
57 40 Byte Sha1[20];
58
59 54 UInt32 Unknown1; // is it 0 always?
60
61
62 union
63 {
64 58 UInt64 NtNodeId;
65 {
66 58 UInt32 ReparseTag;
67 5C UInt32 ReparseFlags; // is it 0 always? Check with new imagex.
68 }
69 }
70
71 60 UInt16 Streams;
72
73 62 UInt16 ShortNameLen;
74 64 UInt16 FileNameLen;
75
76 66 UInt16 Name[];
77 UInt16 ShortName[];
78 }
79
80 // DIRENTRY for WIM_VERSION <= 1.10
81 DIRENTRY_OLD structure:
82 {
83 hex offset
84 0 UInt64 Len;
85 8 UInt32 Attrib;
86 C UInt32 SecurityId;
87
88 union
89 {
90 10 UInt64 SubdirOffset; //
91
92 10 UInt32 OldWimFileId; // used for files in old WIMs
93 14 UInt32 OldWimFileId_Reserved; // = 0
94 }
95
96 18 UInt64 CTime;
97 20 UInt64 ATime;
98 28 UInt64 MTime;
99
100 30 UInt64 Unknown; // NtNodeId ?
101
102 38 UInt16 Streams;
103 3A UInt16 ShortNameLen;
104 3C UInt16 FileNameLen;
105 3E UInt16 FileName[];
106 UInt16 ShortName[];
107 }
108
109 ALT_STREAM structure:
110 {
111 hex offset
112 0 UInt64 Len;
113 8 UInt64 Unused;
114 10 Byte Sha1[20];
115 24 UInt16 FileNameLen;
116 26 UInt16 FileName[];
117 }
118
119 ALT_STREAM_OLD structure:
120 {
121 hex offset
122 0 UInt64 Len;
123 8 UInt64 StreamId; // 32-bit value
124 10 UInt16 FileNameLen;
125 12 UInt16 FileName[];
126 }
127
128 If item is file (not Directory) and there are alternative streams,
129 there is additional ALT_STREAM item of main "unnamed" stream in Streams array.
130
131 */
132
133
134 namespace NResourceFlags
135 {
136 // const Byte kFree = 1 << 0;
137 const Byte kMetadata = 1 << 1;
138 const Byte kCompressed = 1 << 2;
139 // const Byte kSpanned = 1 << 3;
140 const Byte kSolid = 1 << 4;
141 }
142
143 const UInt64 k_SolidBig_Resource_Marker = (UInt64)1 << 32;
144
145 struct CResource
146 {
147 UInt64 PackSize;
148 UInt64 Offset;
149 UInt64 UnpackSize;
150 Byte Flags;
151 bool KeepSolid;
152 int SolidIndex;
153
ClearCResource154 void Clear()
155 {
156 PackSize = 0;
157 Offset = 0;
158 UnpackSize = 0;
159 Flags = 0;
160 KeepSolid = false;
161 SolidIndex = -1;
162 }
163
GetEndLimitCResource164 UInt64 GetEndLimit() const { return Offset + PackSize; }
165 void Parse(const Byte *p);
ParseAndUpdatePhySizeCResource166 void ParseAndUpdatePhySize(const Byte *p, UInt64 &phySize)
167 {
168 Parse(p);
169 UInt64 v = GetEndLimit();
170 if (phySize < v)
171 phySize = v;
172 }
173
174 void WriteTo(Byte *p) const;
175
IsMetadataCResource176 bool IsMetadata() const { return (Flags & NResourceFlags::kMetadata) != 0; }
IsCompressedCResource177 bool IsCompressed() const { return (Flags & NResourceFlags::kCompressed) != 0; }
IsSolidCResource178 bool IsSolid() const { return (Flags & NResourceFlags::kSolid) != 0; }
IsSolidBigCResource179 bool IsSolidBig() const { return IsSolid() && UnpackSize == k_SolidBig_Resource_Marker; }
IsSolidSmallCResource180 bool IsSolidSmall() const { return IsSolid() && UnpackSize == 0; }
181
IsEmptyCResource182 bool IsEmpty() const { return (UnpackSize == 0); }
183 };
184
185
186 struct CSolid
187 {
188 unsigned StreamIndex;
189 // unsigned NumRefs;
190 int FirstSmallStream;
191
192 UInt64 SolidOffset;
193
194 UInt64 UnpackSize;
195 int Method;
196 unsigned ChunkSizeBits;
197
198 UInt64 HeadersSize;
199 // size_t NumChunks;
200 CObjArray<UInt64> Chunks; // [NumChunks + 1] (start offset)
201
GetChunkPackSizeCSolid202 UInt64 GetChunkPackSize(size_t chunkIndex) const { return Chunks[chunkIndex + 1] - Chunks[chunkIndex]; }
203
CSolidCSolid204 CSolid():
205 FirstSmallStream(-1),
206 // NumRefs(0),
207 Method(-1)
208 {}
209 };
210
211
212 namespace NHeaderFlags
213 {
214 const UInt32 kCompression = 1 << 1;
215 const UInt32 kReadOnly = 1 << 2;
216 const UInt32 kSpanned = 1 << 3;
217 const UInt32 kResourceOnly = 1 << 4;
218 const UInt32 kMetadataOnly = 1 << 5;
219 const UInt32 kWriteInProgress = 1 << 6;
220 const UInt32 kReparsePointFixup = 1 << 7;
221
222 const UInt32 kXPRESS = (UInt32)1 << 17;
223 const UInt32 kLZX = (UInt32)1 << 18;
224 const UInt32 kLZMS = (UInt32)1 << 19;
225 const UInt32 kXPRESS2 = (UInt32)1 << 21; // XPRESS with nonstandard chunk size ?
226
227 const UInt32 kMethodMask = 0xFFFE0000;
228 }
229
230
231 namespace NMethod
232 {
233 const UInt32 kXPRESS = 1;
234 const UInt32 kLZX = 2;
235 const UInt32 kLZMS = 3;
236 }
237
238
239 const UInt32 k_Version_NonSolid = 0x10D00;
240 const UInt32 k_Version_Solid = 0xE00;
241
242 const unsigned kHeaderSizeMax = 0xD0;
243 const unsigned kSignatureSize = 8;
244 extern const Byte kSignature[kSignatureSize];
245
246 const unsigned kChunkSizeBits = 15;
247 const UInt32 kChunkSize = (UInt32)1 << kChunkSizeBits;
248
249
250 struct CHeader
251 {
252 UInt32 Version;
253 UInt32 Flags;
254 UInt32 ChunkSize;
255 unsigned ChunkSizeBits;
256 Byte Guid[16];
257 UInt16 PartNumber;
258 UInt16 NumParts;
259 UInt32 NumImages;
260 UInt32 BootIndex;
261
262 bool _isOldVersion; // 1.10-
263 bool _isNewVersion; // 1.13+ or 0.14
264
265 CResource OffsetResource;
266 CResource XmlResource;
267 CResource MetadataResource;
268 CResource IntegrityResource;
269
270 void SetDefaultFields(bool useLZX);
271
272 void WriteTo(Byte *p) const;
273 HRESULT Parse(const Byte *p, UInt64 &phySize);
274
IsCompressedCHeader275 bool IsCompressed() const { return (Flags & NHeaderFlags::kCompression) != 0; }
276
IsSupportedCHeader277 bool IsSupported() const
278 {
279 return (!IsCompressed()
280 || (Flags & NHeaderFlags::kLZX) != 0
281 || (Flags & NHeaderFlags::kXPRESS) != 0
282 || (Flags & NHeaderFlags::kLZMS) != 0
283 || (Flags & NHeaderFlags::kXPRESS2) != 0);
284 }
285
GetMethodCHeader286 unsigned GetMethod() const
287 {
288 if (!IsCompressed())
289 return 0;
290 UInt32 mask = (Flags & NHeaderFlags::kMethodMask);
291 if (mask == 0) return 0;
292 if (mask == NHeaderFlags::kXPRESS) return NMethod::kXPRESS;
293 if (mask == NHeaderFlags::kLZX) return NMethod::kLZX;
294 if (mask == NHeaderFlags::kLZMS) return NMethod::kLZMS;
295 if (mask == NHeaderFlags::kXPRESS2) return NMethod::kXPRESS;
296 return mask;
297 }
298
IsOldVersionCHeader299 bool IsOldVersion() const { return _isOldVersion; }
IsNewVersionCHeader300 bool IsNewVersion() const { return _isNewVersion; }
IsSolidVersionCHeader301 bool IsSolidVersion() const { return (Version == k_Version_Solid); }
302
AreFromOnArchiveCHeader303 bool AreFromOnArchive(const CHeader &h)
304 {
305 return (memcmp(Guid, h.Guid, sizeof(Guid)) == 0) && (h.NumParts == NumParts);
306 }
307 };
308
309
310 const unsigned kHashSize = 20;
311
IsEmptySha(const Byte * data)312 inline bool IsEmptySha(const Byte *data)
313 {
314 for (unsigned i = 0; i < kHashSize; i++)
315 if (data[i] != 0)
316 return false;
317 return true;
318 }
319
320 const unsigned kStreamInfoSize = 24 + 2 + 4 + kHashSize;
321
322 struct CStreamInfo
323 {
324 CResource Resource;
325 UInt16 PartNumber; // for NEW WIM format, we set it to 1 for OLD WIM format
326 UInt32 RefCount;
327 UInt32 Id; // for OLD WIM format
328 Byte Hash[kHashSize];
329
IsEmptyHashCStreamInfo330 bool IsEmptyHash() const { return IsEmptySha(Hash); }
331
332 void WriteTo(Byte *p) const;
333 };
334
335
336 struct CItem
337 {
338 size_t Offset;
339 int IndexInSorted;
340 int StreamIndex;
341 int Parent;
342 int ImageIndex; // -1 means that file is unreferenced in Images (deleted item?)
343 bool IsDir;
344 bool IsAltStream;
345
HasMetadataCItem346 bool HasMetadata() const { return ImageIndex >= 0; }
347
CItemCItem348 CItem():
349 IndexInSorted(-1),
350 StreamIndex(-1),
351 Parent(-1),
352 IsDir(false),
353 IsAltStream(false)
354 {}
355 };
356
357 struct CImage
358 {
359 CByteBuffer Meta;
360 CRecordVector<UInt32> SecurOffsets;
361 unsigned StartItem;
362 unsigned NumItems;
363 unsigned NumEmptyRootItems;
364 int VirtualRootIndex; // index in CDatabase::VirtualRoots[]
365 UString RootName;
366 CByteBuffer RootNameBuf;
367
CImageCImage368 CImage(): VirtualRootIndex(-1) {}
369 };
370
371
372 struct CImageInfo
373 {
374 bool CTimeDefined;
375 bool MTimeDefined;
376 bool NameDefined;
377 bool IndexDefined;
378
379 FILETIME CTime;
380 FILETIME MTime;
381 UString Name;
382
383 UInt64 DirCount;
384 UInt64 FileCount;
385 UInt32 Index;
386
387 int ItemIndexInXml;
388
GetTotalFilesAndDirsCImageInfo389 UInt64 GetTotalFilesAndDirs() const { return DirCount + FileCount; }
390
CImageInfoCImageInfo391 CImageInfo(): CTimeDefined(false), MTimeDefined(false), NameDefined(false),
392 IndexDefined(false), ItemIndexInXml(-1) {}
393 void Parse(const CXmlItem &item);
394 };
395
396
397 struct CWimXml
398 {
399 CByteBuffer Data;
400 CXml Xml;
401
402 UInt16 VolIndex;
403 CObjectVector<CImageInfo> Images;
404
405 UString FileName;
406 bool IsEncrypted;
407
GetTotalFilesAndDirsCWimXml408 UInt64 GetTotalFilesAndDirs() const
409 {
410 UInt64 sum = 0;
411 FOR_VECTOR (i, Images)
412 sum += Images[i].GetTotalFilesAndDirs();
413 return sum;
414 }
415
416 void ToUnicode(UString &s);
417 bool Parse();
418
CWimXmlCWimXml419 CWimXml(): IsEncrypted(false) {}
420 };
421
422
423 struct CVolume
424 {
425 CHeader Header;
426 CMyComPtr<IInStream> Stream;
427 };
428
429
430 class CDatabase
431 {
432 Byte *DirData;
433 size_t DirSize;
434 size_t DirProcessed;
435 size_t DirStartOffset;
436 IArchiveOpenCallback *OpenCallback;
437
438 HRESULT ParseDirItem(size_t pos, int parent);
439 HRESULT ParseImageDirs(CByteBuffer &buf, int parent);
440
441 public:
442 CRecordVector<CStreamInfo> DataStreams;
443 CRecordVector<CStreamInfo> MetaStreams;
444
445 CObjectVector<CSolid> Solids;
446
447 CRecordVector<CItem> Items;
448 CObjectVector<CByteBuffer> ReparseItems;
449 CIntVector ItemToReparse; // from index_in_Items to index_in_ReparseItems
450 // -1 means no reparse;
451
452 CObjectVector<CImage> Images;
453
454 bool IsOldVersion9;
455 bool IsOldVersion;
456 bool ThereAreDeletedStreams;
457 bool ThereAreAltStreams;
458 bool RefCountError;
459 bool HeadersError;
460
GetStartImageIndex()461 unsigned GetStartImageIndex() const { return IsOldVersion9 ? 0 : 1; }
GetDirAlignMask()462 unsigned GetDirAlignMask() const { return IsOldVersion9 ? 3 : 7; }
463
464 // User Items can contain all images or just one image from all.
465 CUIntVector SortedItems;
466 int IndexOfUserImage; // -1 : if more than one images was filled to Sorted Items
467
468 unsigned NumExcludededItems;
469 int ExludedItem; // -1 : if there are no exclude items
470 CUIntVector VirtualRoots; // we use them for old 1.10 WIM archives
471
ThereIsError()472 bool ThereIsError() const { return RefCountError || HeadersError; }
473
GetNumUserItemsInImage(unsigned imageIndex)474 unsigned GetNumUserItemsInImage(unsigned imageIndex) const
475 {
476 if (IndexOfUserImage >= 0 && imageIndex != (unsigned)IndexOfUserImage)
477 return 0;
478 if (imageIndex >= Images.Size())
479 return 0;
480 return Images[imageIndex].NumItems - NumExcludededItems;
481 }
482
483 bool ItemHasStream(const CItem &item) const;
484
Get_UnpackSize_of_Resource(const CResource & r)485 UInt64 Get_UnpackSize_of_Resource(const CResource &r) const
486 {
487 if (!r.IsSolid())
488 return r.UnpackSize;
489 if (r.IsSolidSmall())
490 return r.PackSize;
491 if (r.IsSolidBig() && r.SolidIndex >= 0)
492 return Solids[(unsigned)r.SolidIndex].UnpackSize;
493 return 0;
494 }
495
Get_PackSize_of_Resource(unsigned streamIndex)496 UInt64 Get_PackSize_of_Resource(unsigned streamIndex) const
497 {
498 const CResource &r = DataStreams[streamIndex].Resource;
499 if (!r.IsSolidSmall())
500 return r.PackSize;
501 if (r.SolidIndex >= 0)
502 {
503 const CSolid &ss = Solids[(unsigned)r.SolidIndex];
504 if (ss.FirstSmallStream == (int)streamIndex)
505 return DataStreams[ss.StreamIndex].Resource.PackSize;
506 }
507 return 0;
508 }
509
GetUnpackSize()510 UInt64 GetUnpackSize() const
511 {
512 UInt64 res = 0;
513 FOR_VECTOR (i, DataStreams)
514 res += DataStreams[i].Resource.UnpackSize;
515 return res;
516 }
517
GetPackSize()518 UInt64 GetPackSize() const
519 {
520 UInt64 res = 0;
521 FOR_VECTOR (i, DataStreams)
522 res += DataStreams[i].Resource.PackSize;
523 return res;
524 }
525
Clear()526 void Clear()
527 {
528 DataStreams.Clear();
529 MetaStreams.Clear();
530 Solids.Clear();
531
532 Items.Clear();
533 ReparseItems.Clear();
534 ItemToReparse.Clear();
535
536 SortedItems.Clear();
537
538 Images.Clear();
539 VirtualRoots.Clear();
540
541 IsOldVersion = false;
542 ThereAreDeletedStreams = false;
543 ThereAreAltStreams = false;
544 RefCountError = false;
545 HeadersError = false;
546 }
547
CDatabase()548 CDatabase():
549 RefCountError(false),
550 HeadersError(false)
551 {}
552
553 void GetShortName(unsigned index, NWindows::NCOM::CPropVariant &res) const;
554 void GetItemName(unsigned index1, NWindows::NCOM::CPropVariant &res) const;
555 void GetItemPath(unsigned index, bool showImageNumber, NWindows::NCOM::CPropVariant &res) const;
556
557 HRESULT OpenXml(IInStream *inStream, const CHeader &h, CByteBuffer &xml);
558 HRESULT Open(IInStream *inStream, const CHeader &h, unsigned numItemsReserve, IArchiveOpenCallback *openCallback);
559 HRESULT FillAndCheck(const CObjectVector<CVolume> &volumes);
560
561 /*
562 imageIndex showImageNumber NumImages
563 * true * Show Image_Number
564 -1 * >1 Show Image_Number
565 -1 false 1 Don't show Image_Number
566 N false * Don't show Image_Number
567 */
568 HRESULT GenerateSortedItems(int imageIndex, bool showImageNumber);
569
570 HRESULT ExtractReparseStreams(const CObjectVector<CVolume> &volumes, IArchiveOpenCallback *openCallback);
571 };
572
573 HRESULT ReadHeader(IInStream *inStream, CHeader &header, UInt64 &phySize);
574
575
576 struct CMidBuf
577 {
578 Byte *Data;
579 size_t _size;
580
CMidBufCMidBuf581 CMidBuf(): Data(NULL), _size(0) {}
582
EnsureCapacityCMidBuf583 void EnsureCapacity(size_t size)
584 {
585 if (size > _size)
586 {
587 ::z7_AlignedFree(Data);
588 _size = 0;
589 Data = (Byte *)::z7_AlignedAlloc(size);
590 if (Data)
591 _size = size;
592 }
593 }
594
~CMidBufCMidBuf595 ~CMidBuf() { ::z7_AlignedFree(Data); }
596 };
597
598
599 class CUnpacker
600 {
601 CMyComPtr2<ICompressCoder, NCompress::CCopyCoder> copyCoder;
602 CMyUniquePtr<NCompress::NLzx::CDecoder> lzxDecoder;
603 CMyUniquePtr<NCompress::NLzms::CDecoder> lzmsDecoder;
604
605 CByteBuffer sizesBuf;
606
607 CMidBuf packBuf;
608 CMidBuf unpackBuf;
609
610 // solid resource
611 int _solidIndex;
612 size_t _unpackedChunkIndex;
613
614 HRESULT UnpackChunk(
615 ISequentialInStream *inStream,
616 unsigned method, unsigned chunkSizeBits,
617 size_t inSize, size_t outSize,
618 ISequentialOutStream *outStream);
619
620 HRESULT Unpack2(
621 IInStream *inStream,
622 const CResource &res,
623 const CHeader &header,
624 const CDatabase *db,
625 ISequentialOutStream *outStream,
626 ICompressProgressInfo *progress);
627
628 public:
629 UInt64 TotalPacked;
630
CUnpacker()631 CUnpacker():
632 lzmsDecoder(NULL),
633 _solidIndex(-1),
634 _unpackedChunkIndex(0),
635 TotalPacked(0)
636 {}
637
638 HRESULT Unpack(
639 IInStream *inStream,
640 const CResource &res,
641 const CHeader &header,
642 const CDatabase *db,
643 ISequentialOutStream *outStream,
644 ICompressProgressInfo *progress,
645 Byte *digest);
646
647 HRESULT UnpackData(IInStream *inStream,
648 const CResource &resource, const CHeader &header,
649 const CDatabase *db,
650 CByteBuffer &buf, Byte *digest);
651 };
652
653 }}
654
655 #endif
656