xref: /aosp_15_r20/external/lzma/CPP/7zip/Archive/Chm/ChmIn.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // Archive/ChmIn.cpp
2 
3 #include "StdAfx.h"
4 
5 // #include <stdio.h>
6 
7 #include "../../../../C/CpuArch.h"
8 
9 #include "../../../Common/IntToString.h"
10 #include "../../../Common/UTFConvert.h"
11 
12 #include "../../Common/LimitedStreams.h"
13 #include "../../Common/StreamUtils.h"
14 
15 #include "ChmIn.h"
16 
17 #define Get32(p) GetUi32(p)
18 #define Get64(p) GetUi64(p)
19 
20 namespace NArchive {
21 namespace NChm {
22 
23 static const UInt32 kSignature_ITSP = 0x50535449;
24 static const UInt32 kSignature_PMGL = 0x4C474D50;
25 static const UInt32 kSignature_LZXC = 0x43585A4C;
26 
27 static const UInt32 kSignature_IFCM = 0x4D434649;
28 static const UInt32 kSignature_AOLL = 0x4C4C4F41;
29 static const UInt32 kSignature_CAOL = 0x4C4F4143;
30 
31 static const UInt32 kSignature_ITSF = 0x46535449;
32 static const UInt32 kSignature_ITOL = 0x4C4F5449;
33 static const UInt32 kSignature_ITLS = 0x534C5449;
34 
35 struct CEnexpectedEndException {};
36 struct CHeaderErrorException {};
37 
38 // define CHM_LOW, if you want to see low level items
39 // #define CHM_LOW
40 
41 static const Byte kChmLzxGuid[16]   = { 0x40, 0x89, 0xC2, 0x7F, 0x31, 0x9D, 0xD0, 0x11, 0x9B, 0x27, 0x00, 0xA0, 0xC9, 0x1E, 0x9C, 0x7C };
42 static const Byte kHelp2LzxGuid[16] = { 0xC6, 0x07, 0x90, 0x0A, 0x76, 0x40, 0xD3, 0x11, 0x87, 0x89, 0x00, 0x00, 0xF8, 0x10, 0x57, 0x54 };
43 static const Byte kDesGuid[16]      = { 0xA2, 0xE4, 0xF6, 0x67, 0xBF, 0x60, 0xD3, 0x11, 0x85, 0x40, 0x00, 0xC0, 0x4F, 0x58, 0xC3, 0xCF };
44 
AreGuidsEqual(const Byte * g1,const Byte * g2)45 static bool inline AreGuidsEqual(const Byte *g1, const Byte *g2)
46 {
47   return memcmp(g1, g2, 16) == 0;
48 }
49 
PrintByte(unsigned b,AString & s)50 static void PrintByte(unsigned b, AString &s)
51 {
52   s += (char)GET_HEX_CHAR_UPPER(b >> 4);
53   s += (char)GET_HEX_CHAR_LOWER(b & 0xF);
54 }
55 
GetGuidString() const56 AString CMethodInfo::GetGuidString() const
57 {
58   char s[16 * 2 + 8];
59   RawLeGuidToString_Braced(Guid, s);
60   // MyStringUpper_Ascii(s);
61   return (AString)s;
62 }
63 
IsLzx() const64 bool CMethodInfo::IsLzx() const
65 {
66   if (AreGuidsEqual(Guid, kChmLzxGuid))
67     return true;
68   return AreGuidsEqual(Guid, kHelp2LzxGuid);
69 }
70 
IsDes() const71 bool CMethodInfo::IsDes() const
72 {
73   return AreGuidsEqual(Guid, kDesGuid);
74 }
75 
GetName() const76 AString CMethodInfo::GetName() const
77 {
78   AString s;
79   if (IsLzx())
80   {
81     s = "LZX:";
82     s.Add_UInt32(LzxInfo.GetNumDictBits());
83   }
84   else
85   {
86     if (IsDes())
87       s = "DES";
88     else
89     {
90       s = GetGuidString();
91       /*
92       if (ControlData.Size() > 0 && ControlData.Size() <= (1 << 6))
93       {
94         s.Add_Colon();
95         for (size_t i = 0; i < ControlData.Size(); i++)
96           PrintByte(ControlData[i], s);
97       }
98       */
99     }
100   }
101   return s;
102 }
103 
IsLzx() const104 bool CSectionInfo::IsLzx() const
105 {
106   if (Methods.Size() != 1)
107     return false;
108   return Methods[0].IsLzx();
109 }
110 
GetMethodName() const111 UString CSectionInfo::GetMethodName() const
112 {
113   UString s;
114   if (!IsLzx())
115   {
116     UString temp;
117     ConvertUTF8ToUnicode(Name, temp);
118       s += temp;
119     s += ": ";
120   }
121   FOR_VECTOR (i, Methods)
122   {
123     if (i != 0)
124       s.Add_Space();
125     s += Methods[i].GetName();
126   }
127   return s;
128 }
129 
ReadByte()130 Byte CInArchive::ReadByte()
131 {
132   Byte b;
133   if (!_inBuffer.ReadByte(b))
134     throw CEnexpectedEndException();
135   return b;
136 }
137 
Skip(size_t size)138 void CInArchive::Skip(size_t size)
139 {
140   if (_inBuffer.Skip(size) != size)
141     throw CEnexpectedEndException();
142 }
143 
ReadBytes(Byte * data,UInt32 size)144 void CInArchive::ReadBytes(Byte *data, UInt32 size)
145 {
146   if (_inBuffer.ReadBytes(data, size) != size)
147     throw CEnexpectedEndException();
148 }
149 
ReadUInt16()150 UInt16 CInArchive::ReadUInt16()
151 {
152   Byte b0, b1;
153   if (!_inBuffer.ReadByte(b0)) throw CEnexpectedEndException();
154   if (!_inBuffer.ReadByte(b1)) throw CEnexpectedEndException();
155   return (UInt16)(((UInt16)b1 << 8) | b0);
156 }
157 
ReadUInt32()158 UInt32 CInArchive::ReadUInt32()
159 {
160   Byte p[4];
161   ReadBytes(p, 4);
162   return Get32(p);
163 }
164 
ReadUInt64()165 UInt64 CInArchive::ReadUInt64()
166 {
167   Byte p[8];
168   ReadBytes(p, 8);
169   return Get64(p);
170 }
171 
ReadEncInt()172 UInt64 CInArchive::ReadEncInt()
173 {
174   UInt64 val = 0;
175   for (int i = 0; i < 9; i++)
176   {
177     const unsigned b = ReadByte();
178     val |= (b & 0x7F);
179     if (b < 0x80)
180       return val;
181     val <<= 7;
182   }
183   throw CHeaderErrorException();
184 }
185 
ReadGUID(Byte * g)186 void CInArchive::ReadGUID(Byte *g)
187 {
188   ReadBytes(g, 16);
189 }
190 
ReadString(unsigned size,AString & s)191 void CInArchive::ReadString(unsigned size, AString &s)
192 {
193   s.Empty();
194   if (size != 0)
195   {
196     ReadBytes((Byte *)s.GetBuf(size), size);
197     s.ReleaseBuf_CalcLen(size);
198   }
199 }
200 
ReadUString(unsigned size,UString & s)201 void CInArchive::ReadUString(unsigned size, UString &s)
202 {
203   s.Empty();
204   while (size-- != 0)
205   {
206     const wchar_t c = ReadUInt16();
207     if (c == 0)
208     {
209       Skip(2 * size);
210       return;
211     }
212     s += c;
213   }
214 }
215 
ReadChunk(IInStream * inStream,UInt64 pos,UInt64 size)216 HRESULT CInArchive::ReadChunk(IInStream *inStream, UInt64 pos, UInt64 size)
217 {
218   RINOK(InStream_SeekSet(inStream, pos))
219   CLimitedSequentialInStream *streamSpec = new CLimitedSequentialInStream;
220   CMyComPtr<ISequentialInStream> limitedStream(streamSpec);
221   streamSpec->SetStream(inStream);
222   streamSpec->Init(size);
223   m_InStreamRef = limitedStream;
224   _inBuffer.SetStream(limitedStream);
225   _inBuffer.Init();
226   return S_OK;
227 }
228 
ReadDirEntry(CDatabase & database)229 HRESULT CInArchive::ReadDirEntry(CDatabase &database)
230 {
231   CItem item;
232   const UInt64 nameLen = ReadEncInt();
233   if (nameLen == 0 || nameLen > (1 << 13))
234     return S_FALSE;
235   ReadString((unsigned)nameLen, item.Name);
236   item.Section = ReadEncInt();
237   item.Offset = ReadEncInt();
238   item.Size = ReadEncInt();
239   database.Items.Add(item);
240   return S_OK;
241 }
242 
OpenChm(IInStream * inStream,CDatabase & database)243 HRESULT CInArchive::OpenChm(IInStream *inStream, CDatabase &database)
244 {
245   UInt32 headerSize = ReadUInt32();
246   if (headerSize != 0x60)
247     return S_FALSE;
248   database.PhySize = headerSize;
249 
250   UInt32 unknown1 = ReadUInt32();
251   if (unknown1 != 0 && unknown1 != 1) // it's 0 in one .sll file
252     return S_FALSE;
253 
254   IsArc = true;
255 
256   /* UInt32 timeStamp = */ ReadUInt32();
257       // Considered as a big-endian DWORD, it appears to contain seconds (MSB) and
258       // fractional seconds (second byte).
259       // The third and fourth bytes may contain even more fractional bits.
260       // The 4 least significant bits in the last byte are constant.
261   /* UInt32 lang = */ ReadUInt32();
262   Byte g[16];
263   ReadGUID(g); // {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC}
264   ReadGUID(g); // {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC}
265   const unsigned kNumSections = 2;
266   UInt64 sectionOffsets[kNumSections];
267   UInt64 sectionSizes[kNumSections];
268   unsigned i;
269   for (i = 0; i < kNumSections; i++)
270   {
271     sectionOffsets[i] = ReadUInt64();
272     sectionSizes[i] = ReadUInt64();
273     UInt64 end = sectionOffsets[i] + sectionSizes[i];
274     database.UpdatePhySize(end);
275   }
276   // if (chmVersion == 3)
277     database.ContentOffset = ReadUInt64();
278   /*
279   else
280     database.ContentOffset = database.StartPosition + 0x58
281   */
282 
283   // Section 0
284   ReadChunk(inStream, sectionOffsets[0], sectionSizes[0]);
285   if (sectionSizes[0] < 0x18)
286     return S_FALSE;
287   if (ReadUInt32() != 0x01FE)
288     return S_FALSE;
289   ReadUInt32(); // unknown:  0
290   UInt64 fileSize = ReadUInt64();
291   database.UpdatePhySize(fileSize);
292   ReadUInt32(); // unknown:  0
293   ReadUInt32(); // unknown:  0
294 
295   // Section 1: The Directory Listing
296   ReadChunk(inStream, sectionOffsets[1], sectionSizes[1]);
297   if (ReadUInt32() != kSignature_ITSP)
298     return S_FALSE;
299   if (ReadUInt32() != 1) // version
300     return S_FALSE;
301   /* UInt32 dirHeaderSize = */ ReadUInt32();
302   ReadUInt32(); // 0x0A (unknown)
303   UInt32 dirChunkSize = ReadUInt32(); // $1000
304   if (dirChunkSize < 32)
305     return S_FALSE;
306   /* UInt32 density = */ ReadUInt32(); //  "Density" of quickref section, usually 2.
307   /* UInt32 depth = */ ReadUInt32(); //  Depth of the index tree: 1 there is no index,
308                                // 2 if there is one level of PMGI chunks.
309 
310   /* UInt32 chunkNumber = */ ReadUInt32(); //  Chunk number of root index chunk, -1 if there is none
311                                      // (though at least one file has 0 despite there being no
312                                      // index chunk, probably a bug.)
313   /* UInt32 firstPmglChunkNumber = */ ReadUInt32(); // Chunk number of first PMGL (listing) chunk
314   /* UInt32 lastPmglChunkNumber = */ ReadUInt32();  // Chunk number of last PMGL (listing) chunk
315   ReadUInt32(); // -1 (unknown)
316   UInt32 numDirChunks = ReadUInt32(); // Number of directory chunks (total)
317   /* UInt32 windowsLangId = */ ReadUInt32();
318   ReadGUID(g);  // {5D02926A-212E-11D0-9DF9-00A0C922E6EC}
319   ReadUInt32(); // 0x54 (This is the length again)
320   ReadUInt32(); // -1 (unknown)
321   ReadUInt32(); // -1 (unknown)
322   ReadUInt32(); // -1 (unknown)
323 
324   for (UInt32 ci = 0; ci < numDirChunks; ci++)
325   {
326     UInt64 chunkPos = _inBuffer.GetProcessedSize();
327     if (ReadUInt32() == kSignature_PMGL)
328     {
329       // The quickref area is written backwards from the end of the chunk.
330       // One quickref entry exists for every n entries in the file, where n
331       // is calculated as 1 + (1 << quickref density). So for density = 2, n = 5.
332 
333       const UInt32 quickrefLength = ReadUInt32(); // Len of free space and/or quickref area at end of directory chunk
334       if (quickrefLength > dirChunkSize || quickrefLength < 2)
335         return S_FALSE;
336       ReadUInt32(); // Always 0
337       ReadUInt32(); // Chunk number of previous listing chunk when reading
338                     // directory in sequence (-1 if this is the first listing chunk)
339       ReadUInt32(); // Chunk number of next  listing chunk when reading
340                     // directory in sequence (-1 if this is the last listing chunk)
341       unsigned numItems = 0;
342 
343       for (;;)
344       {
345         const UInt64 offset = _inBuffer.GetProcessedSize() - chunkPos;
346         const UInt32 offsetLimit = dirChunkSize - quickrefLength;
347         if (offset > offsetLimit)
348           return S_FALSE;
349         if (offset == offsetLimit)
350           break;
351         RINOK(ReadDirEntry(database))
352         numItems++;
353       }
354 
355       Skip(quickrefLength - 2);
356 
357       const unsigned rrr = ReadUInt16();
358       if (rrr != numItems)
359       {
360         // Lazarus 9-26-2 chm contains 0 here.
361         if (rrr != 0)
362           return S_FALSE;
363       }
364     }
365     else
366       Skip(dirChunkSize - 4);
367   }
368   return S_OK;
369 }
370 
OpenHelp2(IInStream * inStream,CDatabase & database)371 HRESULT CInArchive::OpenHelp2(IInStream *inStream, CDatabase &database)
372 {
373   if (ReadUInt32() != 1) // version
374     return S_FALSE;
375   if (ReadUInt32() != 0x28) // Location of header section table
376     return S_FALSE;
377   UInt32 numHeaderSections = ReadUInt32();
378   const unsigned kNumHeaderSectionsMax = 5;
379   if (numHeaderSections != kNumHeaderSectionsMax)
380     return S_FALSE;
381 
382   IsArc = true;
383 
384   ReadUInt32(); // Len of post-header table
385   Byte g[16];
386   ReadGUID(g);  // {0A9007C1-4076-11D3-8789-0000F8105754}
387 
388   // header section table
389   UInt64 sectionOffsets[kNumHeaderSectionsMax];
390   UInt64 sectionSizes[kNumHeaderSectionsMax];
391   UInt32 i;
392   for (i = 0; i < numHeaderSections; i++)
393   {
394     sectionOffsets[i] = ReadUInt64();
395     sectionSizes[i] = ReadUInt64();
396     UInt64 end = sectionOffsets[i] + sectionSizes[i];
397     database.UpdatePhySize(end);
398   }
399 
400   // Post-Header
401   ReadUInt32(); // 2
402   ReadUInt32(); // 0x98: offset to CAOL from beginning of post-header)
403   // ----- Directory information
404   ReadUInt64(); // Chunk number of top-level AOLI chunk in directory, or -1
405   ReadUInt64(); // Chunk number of first AOLL chunk in directory
406   ReadUInt64(); // Chunk number of last AOLL chunk in directory
407   ReadUInt64(); // 0 (unknown)
408   ReadUInt32(); // $2000 (Directory chunk size of directory)
409   ReadUInt32(); // Quickref density for main directory, usually 2
410   ReadUInt32(); // 0 (unknown)
411   ReadUInt32(); // Depth of main directory index tree
412                 // 1 there is no index, 2 if there is one level of AOLI chunks.
413   ReadUInt64(); // 0 (unknown)
414   UInt64 numDirEntries = ReadUInt64(); // Number of directory entries
415   // ----- Directory Index Information
416   ReadUInt64(); // -1 (unknown, probably chunk number of top-level AOLI in directory index)
417   ReadUInt64(); // Chunk number of first AOLL chunk in directory index
418   ReadUInt64(); // Chunk number of last AOLL chunk in directory index
419   ReadUInt64(); // 0 (unknown)
420   ReadUInt32(); // $200 (Directory chunk size of directory index)
421   ReadUInt32(); // Quickref density for directory index, usually 2
422   ReadUInt32(); // 0 (unknown)
423   ReadUInt32(); // Depth of directory index index tree.
424   ReadUInt64(); // Possibly flags -- sometimes 1, sometimes 0.
425   ReadUInt64(); // Number of directory index entries (same as number of AOLL
426                // chunks in main directory)
427 
428   // (The obvious guess for the following two fields, which recur in a number
429   // of places, is they are maximum sizes for the directory and directory index.
430   // However, I have seen no direct evidence that this is the case.)
431 
432   ReadUInt32(); // $100000 (Same as field following chunk size in directory)
433   ReadUInt32(); // $20000 (Same as field following chunk size in directory index)
434 
435   ReadUInt64(); // 0 (unknown)
436   if (ReadUInt32() != kSignature_CAOL)
437     return S_FALSE;
438   if (ReadUInt32() != 2) // (Most likely a version number)
439     return S_FALSE;
440   UInt32 caolLength = ReadUInt32(); // $50 (Len of the CAOL section, which includes the ITSF section)
441   if (caolLength >= 0x2C)
442   {
443     /* UInt32 c7 = */ ReadUInt16(); // Unknown.  Remains the same when identical files are built.
444               // Does not appear to be a checksum.  Many files have
445               // 'HH' (HTML Help?) here, indicating this may be a compiler ID
446               //  field.  But at least one ITOL/ITLS compiler does not set this
447               // field to a constant value.
448     ReadUInt16(); // 0 (Unknown.  Possibly part of 00A4 field)
449     ReadUInt32(); // Unknown.  Two values have been seen -- $43ED, and 0.
450     ReadUInt32(); // $2000 (Directory chunk size of directory)
451     ReadUInt32(); // $200 (Directory chunk size of directory index)
452     ReadUInt32(); // $100000 (Same as field following chunk size in directory)
453     ReadUInt32(); // $20000 (Same as field following chunk size in directory index)
454     ReadUInt32(); // 0 (unknown)
455     ReadUInt32(); // 0 (Unknown)
456     if (caolLength == 0x2C)
457     {
458       // fprintf(stdout, "\n !!!NewFormat\n");
459       // fflush(stdout);
460       database.ContentOffset = 0; // maybe we must add database.StartPosition here?
461       database.NewFormat = true;
462     }
463     else if (caolLength == 0x50)
464     {
465       ReadUInt32(); // 0 (Unknown)
466       if (ReadUInt32() != kSignature_ITSF)
467         return S_FALSE;
468       if (ReadUInt32() != 4) // $4 (Version number -- CHM uses 3)
469         return S_FALSE;
470       if (ReadUInt32() != 0x20) // $20 (length of ITSF)
471         return S_FALSE;
472       UInt32 unknown = ReadUInt32();
473       if (unknown != 0 && unknown != 1) // = 0 for some HxW files, 1 in other cases;
474         return S_FALSE;
475       database.ContentOffset = database.StartPosition + ReadUInt64();
476       /* UInt32 timeStamp = */ ReadUInt32();
477           // A timestamp of some sort.
478           // Considered as a big-endian DWORD, it appears to contain
479           // seconds (MSB) and fractional seconds (second byte).
480           // The third and fourth bytes may contain even more fractional
481           // bits.  The 4 least significant bits in the last byte are constant.
482       /* UInt32 lang = */ ReadUInt32(); // BE?
483     }
484     else
485       return S_FALSE;
486   }
487 
488   // Section 0
489   ReadChunk(inStream, database.StartPosition + sectionOffsets[0], sectionSizes[0]);
490   if (sectionSizes[0] < 0x18)
491     return S_FALSE;
492   if (ReadUInt32() != 0x01FE)
493     return S_FALSE;
494   ReadUInt32(); // unknown:  0
495   UInt64 fileSize = ReadUInt64();
496   database.UpdatePhySize(fileSize);
497   ReadUInt32(); // unknown:  0
498   ReadUInt32(); // unknown:  0
499 
500   // Section 1: The Directory Listing
501   ReadChunk(inStream, database.StartPosition + sectionOffsets[1], sectionSizes[1]);
502   if (ReadUInt32() != kSignature_IFCM)
503     return S_FALSE;
504   if (ReadUInt32() != 1) // (probably a version number)
505     return S_FALSE;
506   UInt32 dirChunkSize = ReadUInt32(); // $2000
507   if (dirChunkSize < 64)
508     return S_FALSE;
509   ReadUInt32(); // $100000  (unknown)
510   ReadUInt32(); // -1 (unknown)
511   ReadUInt32(); // -1 (unknown)
512   UInt32 numDirChunks = ReadUInt32();
513   ReadUInt32(); // 0 (unknown, probably high word of above)
514 
515   for (UInt32 ci = 0; ci < numDirChunks; ci++)
516   {
517     UInt64 chunkPos = _inBuffer.GetProcessedSize();
518     if (ReadUInt32() == kSignature_AOLL)
519     {
520       UInt32 quickrefLength = ReadUInt32(); // Len of quickref area at end of directory chunk
521       if (quickrefLength > dirChunkSize || quickrefLength < 2)
522         return S_FALSE;
523       ReadUInt64(); // Directory chunk number
524             // This must match physical position in file, that is
525             // the chunk size times the chunk number must be the
526             // offset from the end of the directory header.
527       ReadUInt64(); // Chunk number of previous listing chunk when reading
528                     // directory in sequence (-1 if first listing chunk)
529       ReadUInt64(); // Chunk number of next listing chunk when reading
530                     // directory in sequence (-1 if last listing chunk)
531       ReadUInt64(); // Number of first listing entry in this chunk
532       ReadUInt32(); // 1 (unknown -- other values have also been seen here)
533       ReadUInt32(); // 0 (unknown)
534 
535       unsigned numItems = 0;
536       for (;;)
537       {
538         const UInt64 offset = _inBuffer.GetProcessedSize() - chunkPos;
539         const UInt32 offsetLimit = dirChunkSize - quickrefLength;
540         if (offset > offsetLimit)
541           return S_FALSE;
542         if (offset == offsetLimit)
543           break;
544         if (database.NewFormat)
545         {
546           const unsigned nameLen = ReadUInt16();
547           if (nameLen == 0)
548             return S_FALSE;
549           UString name;
550           ReadUString(nameLen, name);
551           AString s;
552           ConvertUnicodeToUTF8(name, s);
553           {
554             const unsigned b = ReadByte();
555             s.Add_Space();
556             PrintByte(b, s);
557           }
558           s.Add_Space();
559           UInt64 len = ReadEncInt();
560           // then number of items ?
561           // then length ?
562           // then some data (binary encoding?)
563           if (len > 1u << 29) // what limit here we need?
564             return S_FALSE;
565           if (len)
566           do
567           {
568             const unsigned b = ReadByte();
569             PrintByte(b, s);
570           }
571           while (--len);
572           database.NewFormatString += s;
573           database.NewFormatString += "\r\n";
574         }
575         else
576         {
577           RINOK(ReadDirEntry(database))
578         }
579         numItems++;
580       }
581       Skip(quickrefLength - 2);
582       if (ReadUInt16() != numItems)
583         return S_FALSE;
584       if (numItems > numDirEntries)
585         return S_FALSE;
586       numDirEntries -= numItems;
587     }
588     else
589       Skip(dirChunkSize - 4);
590   }
591   return numDirEntries == 0 ? S_OK : S_FALSE;
592 }
593 
DecompressStream(IInStream * inStream,const CDatabase & database,const AString & name)594 HRESULT CInArchive::DecompressStream(IInStream *inStream, const CDatabase &database, const AString &name)
595 {
596   int index = database.FindItem(name);
597   if (index < 0)
598     return S_FALSE;
599   const CItem &item = database.Items[index];
600   _chunkSize = item.Size;
601   return ReadChunk(inStream, database.ContentOffset + item.Offset, item.Size);
602 }
603 
604 
605 #define DATA_SPACE "::DataSpace/"
606 #define kNameList DATA_SPACE "NameList"
607 #define kStorage DATA_SPACE "Storage/"
608 #define kContent "Content"
609 #define kControlData "ControlData"
610 #define kSpanInfo "SpanInfo"
611 #define kTransform "Transform/"
612 #define kResetTable "/InstanceData/ResetTable"
613 #define kTransformList "List"
614 
GetSectionPrefix(const AString & name)615 static AString GetSectionPrefix(const AString &name)
616 {
617   AString s (kStorage);
618   s += name;
619   s.Add_Slash();
620   return s;
621 }
622 
623 #define RINOZ(x) { int _tt_ = (x); if (_tt_ != 0) return _tt_; }
624 
CompareFiles(const unsigned * p1,const unsigned * p2,void * param)625 static int CompareFiles(const unsigned *p1, const unsigned *p2, void *param)
626 {
627   const CObjectVector<CItem> &items = *(const CObjectVector<CItem> *)param;
628   const CItem &item1 = items[*p1];
629   const CItem &item2 = items[*p2];
630   bool isDir1 = item1.IsDir();
631   bool isDir2 = item2.IsDir();
632   if (isDir1 && !isDir2)
633     return -1;
634   if (isDir2)
635   {
636     if (!isDir1)
637       return 1;
638   }
639   else
640   {
641     RINOZ(MyCompare(item1.Section, item2.Section))
642     RINOZ(MyCompare(item1.Offset, item2.Offset))
643     RINOZ(MyCompare(item1.Size, item2.Size))
644   }
645   return MyCompare(*p1, *p2);
646 }
647 
SetIndices()648 void CFilesDatabase::SetIndices()
649 {
650   FOR_VECTOR (i, Items)
651   {
652     const CItem &item = Items[i];
653     if (item.IsUserItem() && item.Name.Len() != 1)
654       Indices.Add(i);
655   }
656 }
657 
Sort()658 void CFilesDatabase::Sort()
659 {
660   Indices.Sort(CompareFiles, (void *)&Items);
661 }
662 
Check()663 bool CFilesDatabase::Check()
664 {
665   UInt64 maxPos = 0;
666   UInt64 prevSection = 0;
667   FOR_VECTOR (i, Indices)
668   {
669     const CItem &item = Items[Indices[i]];
670     if (item.Section == 0 || item.IsDir())
671       continue;
672     if (item.Section != prevSection)
673     {
674       prevSection = item.Section;
675       maxPos = 0;
676       continue;
677     }
678     if (item.Offset < maxPos)
679       return false;
680     maxPos = item.Offset + item.Size;
681     if (maxPos < item.Offset)
682       return false;
683   }
684   return true;
685 }
686 
CheckSectionRefs()687 bool CFilesDatabase::CheckSectionRefs()
688 {
689   FOR_VECTOR (i, Indices)
690   {
691     const CItem &item = Items[Indices[i]];
692     if (item.Section == 0 || item.IsDir())
693       continue;
694     if (item.Section >= Sections.Size())
695       return false;
696   }
697   return true;
698 }
699 
GetLog(UInt32 num)700 static int inline GetLog(UInt32 num)
701 {
702   for (int i = 0; i < 32; i++)
703     if (((UInt32)1 << i) == num)
704       return i;
705   return -1;
706 }
707 
OpenHighLevel(IInStream * inStream,CFilesDatabase & database)708 HRESULT CInArchive::OpenHighLevel(IInStream *inStream, CFilesDatabase &database)
709 {
710   {
711     // The NameList file
712     RINOK(DecompressStream(inStream, database, (AString)kNameList))
713     /* UInt16 length = */ ReadUInt16();
714     UInt16 numSections = ReadUInt16();
715     for (unsigned i = 0; i < numSections; i++)
716     {
717       CSectionInfo section;
718       const unsigned nameLen = ReadUInt16();
719       UString name;
720       ReadUString(nameLen, name);
721       if (ReadUInt16() != 0)
722         return S_FALSE;
723       ConvertUnicodeToUTF8(name, section.Name);
724       // if (!ConvertUnicodeToUTF8(name, section.Name)) return S_FALSE;
725       database.Sections.Add(section);
726     }
727   }
728 
729   unsigned si;
730   for (si = 1; si < database.Sections.Size(); si++)
731   {
732     CSectionInfo &section = database.Sections[si];
733     AString sectionPrefix (GetSectionPrefix(section.Name));
734     {
735       // Content
736       int index = database.FindItem(sectionPrefix + kContent);
737       if (index < 0)
738         return S_FALSE;
739       const CItem &item = database.Items[index];
740       section.Offset = item.Offset;
741       section.CompressedSize = item.Size;
742     }
743     AString transformPrefix (sectionPrefix + kTransform);
744     if (database.Help2Format)
745     {
746       // Transform List
747       RINOK(DecompressStream(inStream, database, transformPrefix + kTransformList))
748       if ((_chunkSize & 0xF) != 0)
749         return S_FALSE;
750       unsigned numGuids = (unsigned)(_chunkSize / 0x10);
751       if (numGuids < 1)
752         return S_FALSE;
753       for (unsigned i = 0; i < numGuids; i++)
754       {
755         CMethodInfo method;
756         ReadGUID(method.Guid);
757         section.Methods.Add(method);
758       }
759     }
760     else
761     {
762       CMethodInfo method;
763       memcpy(method.Guid, kChmLzxGuid, 16);
764       section.Methods.Add(method);
765     }
766 
767     {
768       // Control Data
769       RINOK(DecompressStream(inStream, database, sectionPrefix + kControlData))
770 
771       FOR_VECTOR (mi, section.Methods)
772       {
773         CMethodInfo &method = section.Methods[mi];
774         UInt32 numDWORDS = ReadUInt32();
775         if (method.IsLzx())
776         {
777           if (numDWORDS < 5)
778             return S_FALSE;
779           if (ReadUInt32() != kSignature_LZXC)
780             return S_FALSE;
781           CLzxInfo &li = method.LzxInfo;
782           li.Version = ReadUInt32();
783           if (li.Version != 2 && li.Version != 3)
784             return S_FALSE;
785 
786           {
787             // There is bug in VC6, if we use function call as parameter for inline function
788             const UInt32 val32 = ReadUInt32();
789             const int n = GetLog(val32);
790             if (n < 0 || n > 16)
791               return S_FALSE;
792             li.ResetIntervalBits = (unsigned)n;
793           }
794 
795           {
796             const UInt32 val32 = ReadUInt32();
797             const int n = GetLog(val32);
798             if (n < 0 || n > 16)
799               return S_FALSE;
800             li.WindowSizeBits = (unsigned)n;
801           }
802 
803           li.CacheSize = ReadUInt32();
804           numDWORDS -= 5;
805           if (numDWORDS)
806           do
807             ReadUInt32();
808           while (--numDWORDS);
809         }
810         else
811         {
812           if (numDWORDS > 1u << 27)
813             return S_FALSE;
814           const size_t numBytes = (size_t)numDWORDS * 4;
815           // method.ControlData.Alloc(numBytes);
816           // ReadBytes(method.ControlData, numBytes);
817           Skip(numBytes);
818         }
819       }
820     }
821 
822     {
823       // SpanInfo
824       RINOK(DecompressStream(inStream, database, sectionPrefix + kSpanInfo))
825       section.UncompressedSize = ReadUInt64();
826     }
827 
828     // read ResetTable for LZX
829     FOR_VECTOR (mi, section.Methods)
830     {
831       CMethodInfo &method = section.Methods[mi];
832       if (method.IsLzx())
833       {
834         // ResetTable;
835         RINOK(DecompressStream(inStream, database, transformPrefix +
836             method.GetGuidString() + kResetTable))
837         CResetTable &rt = method.LzxInfo.ResetTable;
838 
839         if (_chunkSize < 4)
840         {
841           if (_chunkSize != 0)
842             return S_FALSE;
843           // ResetTable is empty in .chw files
844           if (section.UncompressedSize != 0)
845             return S_FALSE;
846           rt.UncompressedSize = 0;
847           rt.CompressedSize = 0;
848           // rt.BlockSize = 0;
849         }
850         else
851         {
852           const UInt32 ver = ReadUInt32(); // 2  unknown (possibly a version number)
853           if (ver != 2 && ver != 3)
854             return S_FALSE;
855           const UInt32 numEntries = ReadUInt32();
856           const unsigned kEntrySize = 8;
857           if (ReadUInt32() != kEntrySize)
858             return S_FALSE;
859           const unsigned kRtHeaderSize = 4 * 4 + 8 * 3;
860           if (ReadUInt32() != kRtHeaderSize)
861             return S_FALSE;
862           if (kRtHeaderSize + kEntrySize * (UInt64)numEntries != _chunkSize)
863             return S_FALSE;
864 
865           rt.UncompressedSize = ReadUInt64();
866           rt.CompressedSize = ReadUInt64();
867           UInt64 blockSize = ReadUInt64();
868           if (blockSize != kBlockSize)
869             return S_FALSE;
870           UInt64 numBlocks = (rt.UncompressedSize + kBlockSize + 1) / kBlockSize;
871           if (numEntries != numBlocks &&
872               numEntries != numBlocks + 1)
873             return S_FALSE;
874 
875           rt.ResetOffsets.ClearAndReserve(numEntries);
876 
877           for (UInt32 i = 0; i < numEntries; i++)
878           {
879             UInt64 v = ReadUInt64();
880             if (i != 0 && v < rt.ResetOffsets[i - 1])
881               return S_FALSE;
882             rt.ResetOffsets.AddInReserved(v);
883           }
884 
885           if (numEntries != 0)
886             if (rt.ResetOffsets[0] != 0)
887               return S_FALSE;
888 
889           if (numEntries == numBlocks + 1)
890           {
891             // Lazarus 9-26-2 chm contains additional entty
892             if (rt.ResetOffsets.Back() != rt.CompressedSize)
893               return S_FALSE;
894           }
895         }
896       }
897     }
898   }
899 
900   database.SetIndices();
901   database.Sort();
902   return database.Check() ? S_OK : S_FALSE;
903 }
904 
Open2(IInStream * inStream,const UInt64 * searchHeaderSizeLimit,CFilesDatabase & database)905 HRESULT CInArchive::Open2(IInStream *inStream,
906     const UInt64 *searchHeaderSizeLimit,
907     CFilesDatabase &database)
908 {
909   IsArc = false;
910   HeadersError = false;
911   UnexpectedEnd = false;
912   UnsupportedFeature = false;
913 
914   database.Clear();
915   database.Help2Format = _help2;
916   const UInt32 chmVersion = 3;
917 
918   RINOK(InStream_GetPos(inStream, database.StartPosition))
919 
920   if (!_inBuffer.Create(1 << 14))
921     return E_OUTOFMEMORY;
922   _inBuffer.SetStream(inStream);
923   _inBuffer.Init();
924 
925   if (_help2)
926   {
927     const unsigned kSignatureSize = 8;
928     const UInt64 signature = ((UInt64)kSignature_ITLS << 32) | kSignature_ITOL;
929     UInt64 limit = 1 << 18;
930 
931     if (searchHeaderSizeLimit)
932       if (limit > *searchHeaderSizeLimit)
933         limit = *searchHeaderSizeLimit;
934 
935     UInt64 val = 0;
936 
937     for (;;)
938     {
939       Byte b;
940       if (!_inBuffer.ReadByte(b))
941         return S_FALSE;
942       val >>= 8;
943       val |= ((UInt64)b) << ((kSignatureSize - 1) * 8);
944       if (_inBuffer.GetProcessedSize() >= kSignatureSize)
945       {
946         if (val == signature)
947           break;
948         if (_inBuffer.GetProcessedSize() > limit)
949           return S_FALSE;
950       }
951     }
952 
953     database.StartPosition += _inBuffer.GetProcessedSize() - kSignatureSize;
954     RINOK(OpenHelp2(inStream, database))
955     if (database.NewFormat)
956       return S_OK;
957   }
958   else
959   {
960     if (ReadUInt32() != kSignature_ITSF)
961       return S_FALSE;
962     if (ReadUInt32() != chmVersion)
963       return S_FALSE;
964     RINOK(OpenChm(inStream, database))
965   }
966 
967 
968   #ifndef CHM_LOW
969 
970   try
971   {
972     try
973     {
974       HRESULT res = OpenHighLevel(inStream, database);
975       if (res == S_FALSE)
976       {
977         UnsupportedFeature = true;
978         database.HighLevelClear();
979         return S_OK;
980       }
981       RINOK(res)
982       if (!database.CheckSectionRefs())
983         HeadersError = true;
984       database.LowLevel = false;
985     }
986     catch(...)
987     {
988       database.HighLevelClear();
989       throw;
990     }
991   }
992   // catch(const CInBufferException &e) { return e.ErrorCode; }
993   catch(CEnexpectedEndException &) { UnexpectedEnd = true; }
994   catch(CHeaderErrorException &) { HeadersError = true; }
995   catch(...) { throw; }
996 
997   #endif
998 
999   return S_OK;
1000 }
1001 
Open(IInStream * inStream,const UInt64 * searchHeaderSizeLimit,CFilesDatabase & database)1002 HRESULT CInArchive::Open(IInStream *inStream,
1003     const UInt64 *searchHeaderSizeLimit,
1004     CFilesDatabase &database)
1005 {
1006   try
1007   {
1008     try
1009     {
1010       HRESULT res = Open2(inStream, searchHeaderSizeLimit, database);
1011       m_InStreamRef.Release();
1012       return res;
1013     }
1014     catch(...)
1015     {
1016       m_InStreamRef.Release();
1017       throw;
1018     }
1019   }
1020   catch(const CInBufferException &e) { return e.ErrorCode; }
1021   catch(CEnexpectedEndException &) { UnexpectedEnd = true; }
1022   catch(CHeaderErrorException &) { HeadersError = true; }
1023   return S_FALSE;
1024 }
1025 
1026 }}
1027