1 // Archive/ChmIn.cpp
2
3 #include "StdAfx.h"
4
5 // #include <stdio.h>
6
7 #include "../../../../C/CpuArch.h"
8
9 #include "../../../Common/IntToString.h"
10 #include "../../../Common/UTFConvert.h"
11
12 #include "../../Common/LimitedStreams.h"
13 #include "../../Common/StreamUtils.h"
14
15 #include "ChmIn.h"
16
17 #define Get32(p) GetUi32(p)
18 #define Get64(p) GetUi64(p)
19
20 namespace NArchive {
21 namespace NChm {
22
23 static const UInt32 kSignature_ITSP = 0x50535449;
24 static const UInt32 kSignature_PMGL = 0x4C474D50;
25 static const UInt32 kSignature_LZXC = 0x43585A4C;
26
27 static const UInt32 kSignature_IFCM = 0x4D434649;
28 static const UInt32 kSignature_AOLL = 0x4C4C4F41;
29 static const UInt32 kSignature_CAOL = 0x4C4F4143;
30
31 static const UInt32 kSignature_ITSF = 0x46535449;
32 static const UInt32 kSignature_ITOL = 0x4C4F5449;
33 static const UInt32 kSignature_ITLS = 0x534C5449;
34
35 struct CEnexpectedEndException {};
36 struct CHeaderErrorException {};
37
38 // define CHM_LOW, if you want to see low level items
39 // #define CHM_LOW
40
41 static const Byte kChmLzxGuid[16] = { 0x40, 0x89, 0xC2, 0x7F, 0x31, 0x9D, 0xD0, 0x11, 0x9B, 0x27, 0x00, 0xA0, 0xC9, 0x1E, 0x9C, 0x7C };
42 static const Byte kHelp2LzxGuid[16] = { 0xC6, 0x07, 0x90, 0x0A, 0x76, 0x40, 0xD3, 0x11, 0x87, 0x89, 0x00, 0x00, 0xF8, 0x10, 0x57, 0x54 };
43 static const Byte kDesGuid[16] = { 0xA2, 0xE4, 0xF6, 0x67, 0xBF, 0x60, 0xD3, 0x11, 0x85, 0x40, 0x00, 0xC0, 0x4F, 0x58, 0xC3, 0xCF };
44
AreGuidsEqual(const Byte * g1,const Byte * g2)45 static bool inline AreGuidsEqual(const Byte *g1, const Byte *g2)
46 {
47 return memcmp(g1, g2, 16) == 0;
48 }
49
PrintByte(unsigned b,AString & s)50 static void PrintByte(unsigned b, AString &s)
51 {
52 s += (char)GET_HEX_CHAR_UPPER(b >> 4);
53 s += (char)GET_HEX_CHAR_LOWER(b & 0xF);
54 }
55
GetGuidString() const56 AString CMethodInfo::GetGuidString() const
57 {
58 char s[16 * 2 + 8];
59 RawLeGuidToString_Braced(Guid, s);
60 // MyStringUpper_Ascii(s);
61 return (AString)s;
62 }
63
IsLzx() const64 bool CMethodInfo::IsLzx() const
65 {
66 if (AreGuidsEqual(Guid, kChmLzxGuid))
67 return true;
68 return AreGuidsEqual(Guid, kHelp2LzxGuid);
69 }
70
IsDes() const71 bool CMethodInfo::IsDes() const
72 {
73 return AreGuidsEqual(Guid, kDesGuid);
74 }
75
GetName() const76 AString CMethodInfo::GetName() const
77 {
78 AString s;
79 if (IsLzx())
80 {
81 s = "LZX:";
82 s.Add_UInt32(LzxInfo.GetNumDictBits());
83 }
84 else
85 {
86 if (IsDes())
87 s = "DES";
88 else
89 {
90 s = GetGuidString();
91 /*
92 if (ControlData.Size() > 0 && ControlData.Size() <= (1 << 6))
93 {
94 s.Add_Colon();
95 for (size_t i = 0; i < ControlData.Size(); i++)
96 PrintByte(ControlData[i], s);
97 }
98 */
99 }
100 }
101 return s;
102 }
103
IsLzx() const104 bool CSectionInfo::IsLzx() const
105 {
106 if (Methods.Size() != 1)
107 return false;
108 return Methods[0].IsLzx();
109 }
110
GetMethodName() const111 UString CSectionInfo::GetMethodName() const
112 {
113 UString s;
114 if (!IsLzx())
115 {
116 UString temp;
117 ConvertUTF8ToUnicode(Name, temp);
118 s += temp;
119 s += ": ";
120 }
121 FOR_VECTOR (i, Methods)
122 {
123 if (i != 0)
124 s.Add_Space();
125 s += Methods[i].GetName();
126 }
127 return s;
128 }
129
ReadByte()130 Byte CInArchive::ReadByte()
131 {
132 Byte b;
133 if (!_inBuffer.ReadByte(b))
134 throw CEnexpectedEndException();
135 return b;
136 }
137
Skip(size_t size)138 void CInArchive::Skip(size_t size)
139 {
140 if (_inBuffer.Skip(size) != size)
141 throw CEnexpectedEndException();
142 }
143
ReadBytes(Byte * data,UInt32 size)144 void CInArchive::ReadBytes(Byte *data, UInt32 size)
145 {
146 if (_inBuffer.ReadBytes(data, size) != size)
147 throw CEnexpectedEndException();
148 }
149
ReadUInt16()150 UInt16 CInArchive::ReadUInt16()
151 {
152 Byte b0, b1;
153 if (!_inBuffer.ReadByte(b0)) throw CEnexpectedEndException();
154 if (!_inBuffer.ReadByte(b1)) throw CEnexpectedEndException();
155 return (UInt16)(((UInt16)b1 << 8) | b0);
156 }
157
ReadUInt32()158 UInt32 CInArchive::ReadUInt32()
159 {
160 Byte p[4];
161 ReadBytes(p, 4);
162 return Get32(p);
163 }
164
ReadUInt64()165 UInt64 CInArchive::ReadUInt64()
166 {
167 Byte p[8];
168 ReadBytes(p, 8);
169 return Get64(p);
170 }
171
ReadEncInt()172 UInt64 CInArchive::ReadEncInt()
173 {
174 UInt64 val = 0;
175 for (int i = 0; i < 9; i++)
176 {
177 const unsigned b = ReadByte();
178 val |= (b & 0x7F);
179 if (b < 0x80)
180 return val;
181 val <<= 7;
182 }
183 throw CHeaderErrorException();
184 }
185
ReadGUID(Byte * g)186 void CInArchive::ReadGUID(Byte *g)
187 {
188 ReadBytes(g, 16);
189 }
190
ReadString(unsigned size,AString & s)191 void CInArchive::ReadString(unsigned size, AString &s)
192 {
193 s.Empty();
194 if (size != 0)
195 {
196 ReadBytes((Byte *)s.GetBuf(size), size);
197 s.ReleaseBuf_CalcLen(size);
198 }
199 }
200
ReadUString(unsigned size,UString & s)201 void CInArchive::ReadUString(unsigned size, UString &s)
202 {
203 s.Empty();
204 while (size-- != 0)
205 {
206 const wchar_t c = ReadUInt16();
207 if (c == 0)
208 {
209 Skip(2 * size);
210 return;
211 }
212 s += c;
213 }
214 }
215
ReadChunk(IInStream * inStream,UInt64 pos,UInt64 size)216 HRESULT CInArchive::ReadChunk(IInStream *inStream, UInt64 pos, UInt64 size)
217 {
218 RINOK(InStream_SeekSet(inStream, pos))
219 CLimitedSequentialInStream *streamSpec = new CLimitedSequentialInStream;
220 CMyComPtr<ISequentialInStream> limitedStream(streamSpec);
221 streamSpec->SetStream(inStream);
222 streamSpec->Init(size);
223 m_InStreamRef = limitedStream;
224 _inBuffer.SetStream(limitedStream);
225 _inBuffer.Init();
226 return S_OK;
227 }
228
ReadDirEntry(CDatabase & database)229 HRESULT CInArchive::ReadDirEntry(CDatabase &database)
230 {
231 CItem item;
232 const UInt64 nameLen = ReadEncInt();
233 if (nameLen == 0 || nameLen > (1 << 13))
234 return S_FALSE;
235 ReadString((unsigned)nameLen, item.Name);
236 item.Section = ReadEncInt();
237 item.Offset = ReadEncInt();
238 item.Size = ReadEncInt();
239 database.Items.Add(item);
240 return S_OK;
241 }
242
OpenChm(IInStream * inStream,CDatabase & database)243 HRESULT CInArchive::OpenChm(IInStream *inStream, CDatabase &database)
244 {
245 UInt32 headerSize = ReadUInt32();
246 if (headerSize != 0x60)
247 return S_FALSE;
248 database.PhySize = headerSize;
249
250 UInt32 unknown1 = ReadUInt32();
251 if (unknown1 != 0 && unknown1 != 1) // it's 0 in one .sll file
252 return S_FALSE;
253
254 IsArc = true;
255
256 /* UInt32 timeStamp = */ ReadUInt32();
257 // Considered as a big-endian DWORD, it appears to contain seconds (MSB) and
258 // fractional seconds (second byte).
259 // The third and fourth bytes may contain even more fractional bits.
260 // The 4 least significant bits in the last byte are constant.
261 /* UInt32 lang = */ ReadUInt32();
262 Byte g[16];
263 ReadGUID(g); // {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC}
264 ReadGUID(g); // {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC}
265 const unsigned kNumSections = 2;
266 UInt64 sectionOffsets[kNumSections];
267 UInt64 sectionSizes[kNumSections];
268 unsigned i;
269 for (i = 0; i < kNumSections; i++)
270 {
271 sectionOffsets[i] = ReadUInt64();
272 sectionSizes[i] = ReadUInt64();
273 UInt64 end = sectionOffsets[i] + sectionSizes[i];
274 database.UpdatePhySize(end);
275 }
276 // if (chmVersion == 3)
277 database.ContentOffset = ReadUInt64();
278 /*
279 else
280 database.ContentOffset = database.StartPosition + 0x58
281 */
282
283 // Section 0
284 ReadChunk(inStream, sectionOffsets[0], sectionSizes[0]);
285 if (sectionSizes[0] < 0x18)
286 return S_FALSE;
287 if (ReadUInt32() != 0x01FE)
288 return S_FALSE;
289 ReadUInt32(); // unknown: 0
290 UInt64 fileSize = ReadUInt64();
291 database.UpdatePhySize(fileSize);
292 ReadUInt32(); // unknown: 0
293 ReadUInt32(); // unknown: 0
294
295 // Section 1: The Directory Listing
296 ReadChunk(inStream, sectionOffsets[1], sectionSizes[1]);
297 if (ReadUInt32() != kSignature_ITSP)
298 return S_FALSE;
299 if (ReadUInt32() != 1) // version
300 return S_FALSE;
301 /* UInt32 dirHeaderSize = */ ReadUInt32();
302 ReadUInt32(); // 0x0A (unknown)
303 UInt32 dirChunkSize = ReadUInt32(); // $1000
304 if (dirChunkSize < 32)
305 return S_FALSE;
306 /* UInt32 density = */ ReadUInt32(); // "Density" of quickref section, usually 2.
307 /* UInt32 depth = */ ReadUInt32(); // Depth of the index tree: 1 there is no index,
308 // 2 if there is one level of PMGI chunks.
309
310 /* UInt32 chunkNumber = */ ReadUInt32(); // Chunk number of root index chunk, -1 if there is none
311 // (though at least one file has 0 despite there being no
312 // index chunk, probably a bug.)
313 /* UInt32 firstPmglChunkNumber = */ ReadUInt32(); // Chunk number of first PMGL (listing) chunk
314 /* UInt32 lastPmglChunkNumber = */ ReadUInt32(); // Chunk number of last PMGL (listing) chunk
315 ReadUInt32(); // -1 (unknown)
316 UInt32 numDirChunks = ReadUInt32(); // Number of directory chunks (total)
317 /* UInt32 windowsLangId = */ ReadUInt32();
318 ReadGUID(g); // {5D02926A-212E-11D0-9DF9-00A0C922E6EC}
319 ReadUInt32(); // 0x54 (This is the length again)
320 ReadUInt32(); // -1 (unknown)
321 ReadUInt32(); // -1 (unknown)
322 ReadUInt32(); // -1 (unknown)
323
324 for (UInt32 ci = 0; ci < numDirChunks; ci++)
325 {
326 UInt64 chunkPos = _inBuffer.GetProcessedSize();
327 if (ReadUInt32() == kSignature_PMGL)
328 {
329 // The quickref area is written backwards from the end of the chunk.
330 // One quickref entry exists for every n entries in the file, where n
331 // is calculated as 1 + (1 << quickref density). So for density = 2, n = 5.
332
333 const UInt32 quickrefLength = ReadUInt32(); // Len of free space and/or quickref area at end of directory chunk
334 if (quickrefLength > dirChunkSize || quickrefLength < 2)
335 return S_FALSE;
336 ReadUInt32(); // Always 0
337 ReadUInt32(); // Chunk number of previous listing chunk when reading
338 // directory in sequence (-1 if this is the first listing chunk)
339 ReadUInt32(); // Chunk number of next listing chunk when reading
340 // directory in sequence (-1 if this is the last listing chunk)
341 unsigned numItems = 0;
342
343 for (;;)
344 {
345 const UInt64 offset = _inBuffer.GetProcessedSize() - chunkPos;
346 const UInt32 offsetLimit = dirChunkSize - quickrefLength;
347 if (offset > offsetLimit)
348 return S_FALSE;
349 if (offset == offsetLimit)
350 break;
351 RINOK(ReadDirEntry(database))
352 numItems++;
353 }
354
355 Skip(quickrefLength - 2);
356
357 const unsigned rrr = ReadUInt16();
358 if (rrr != numItems)
359 {
360 // Lazarus 9-26-2 chm contains 0 here.
361 if (rrr != 0)
362 return S_FALSE;
363 }
364 }
365 else
366 Skip(dirChunkSize - 4);
367 }
368 return S_OK;
369 }
370
OpenHelp2(IInStream * inStream,CDatabase & database)371 HRESULT CInArchive::OpenHelp2(IInStream *inStream, CDatabase &database)
372 {
373 if (ReadUInt32() != 1) // version
374 return S_FALSE;
375 if (ReadUInt32() != 0x28) // Location of header section table
376 return S_FALSE;
377 UInt32 numHeaderSections = ReadUInt32();
378 const unsigned kNumHeaderSectionsMax = 5;
379 if (numHeaderSections != kNumHeaderSectionsMax)
380 return S_FALSE;
381
382 IsArc = true;
383
384 ReadUInt32(); // Len of post-header table
385 Byte g[16];
386 ReadGUID(g); // {0A9007C1-4076-11D3-8789-0000F8105754}
387
388 // header section table
389 UInt64 sectionOffsets[kNumHeaderSectionsMax];
390 UInt64 sectionSizes[kNumHeaderSectionsMax];
391 UInt32 i;
392 for (i = 0; i < numHeaderSections; i++)
393 {
394 sectionOffsets[i] = ReadUInt64();
395 sectionSizes[i] = ReadUInt64();
396 UInt64 end = sectionOffsets[i] + sectionSizes[i];
397 database.UpdatePhySize(end);
398 }
399
400 // Post-Header
401 ReadUInt32(); // 2
402 ReadUInt32(); // 0x98: offset to CAOL from beginning of post-header)
403 // ----- Directory information
404 ReadUInt64(); // Chunk number of top-level AOLI chunk in directory, or -1
405 ReadUInt64(); // Chunk number of first AOLL chunk in directory
406 ReadUInt64(); // Chunk number of last AOLL chunk in directory
407 ReadUInt64(); // 0 (unknown)
408 ReadUInt32(); // $2000 (Directory chunk size of directory)
409 ReadUInt32(); // Quickref density for main directory, usually 2
410 ReadUInt32(); // 0 (unknown)
411 ReadUInt32(); // Depth of main directory index tree
412 // 1 there is no index, 2 if there is one level of AOLI chunks.
413 ReadUInt64(); // 0 (unknown)
414 UInt64 numDirEntries = ReadUInt64(); // Number of directory entries
415 // ----- Directory Index Information
416 ReadUInt64(); // -1 (unknown, probably chunk number of top-level AOLI in directory index)
417 ReadUInt64(); // Chunk number of first AOLL chunk in directory index
418 ReadUInt64(); // Chunk number of last AOLL chunk in directory index
419 ReadUInt64(); // 0 (unknown)
420 ReadUInt32(); // $200 (Directory chunk size of directory index)
421 ReadUInt32(); // Quickref density for directory index, usually 2
422 ReadUInt32(); // 0 (unknown)
423 ReadUInt32(); // Depth of directory index index tree.
424 ReadUInt64(); // Possibly flags -- sometimes 1, sometimes 0.
425 ReadUInt64(); // Number of directory index entries (same as number of AOLL
426 // chunks in main directory)
427
428 // (The obvious guess for the following two fields, which recur in a number
429 // of places, is they are maximum sizes for the directory and directory index.
430 // However, I have seen no direct evidence that this is the case.)
431
432 ReadUInt32(); // $100000 (Same as field following chunk size in directory)
433 ReadUInt32(); // $20000 (Same as field following chunk size in directory index)
434
435 ReadUInt64(); // 0 (unknown)
436 if (ReadUInt32() != kSignature_CAOL)
437 return S_FALSE;
438 if (ReadUInt32() != 2) // (Most likely a version number)
439 return S_FALSE;
440 UInt32 caolLength = ReadUInt32(); // $50 (Len of the CAOL section, which includes the ITSF section)
441 if (caolLength >= 0x2C)
442 {
443 /* UInt32 c7 = */ ReadUInt16(); // Unknown. Remains the same when identical files are built.
444 // Does not appear to be a checksum. Many files have
445 // 'HH' (HTML Help?) here, indicating this may be a compiler ID
446 // field. But at least one ITOL/ITLS compiler does not set this
447 // field to a constant value.
448 ReadUInt16(); // 0 (Unknown. Possibly part of 00A4 field)
449 ReadUInt32(); // Unknown. Two values have been seen -- $43ED, and 0.
450 ReadUInt32(); // $2000 (Directory chunk size of directory)
451 ReadUInt32(); // $200 (Directory chunk size of directory index)
452 ReadUInt32(); // $100000 (Same as field following chunk size in directory)
453 ReadUInt32(); // $20000 (Same as field following chunk size in directory index)
454 ReadUInt32(); // 0 (unknown)
455 ReadUInt32(); // 0 (Unknown)
456 if (caolLength == 0x2C)
457 {
458 // fprintf(stdout, "\n !!!NewFormat\n");
459 // fflush(stdout);
460 database.ContentOffset = 0; // maybe we must add database.StartPosition here?
461 database.NewFormat = true;
462 }
463 else if (caolLength == 0x50)
464 {
465 ReadUInt32(); // 0 (Unknown)
466 if (ReadUInt32() != kSignature_ITSF)
467 return S_FALSE;
468 if (ReadUInt32() != 4) // $4 (Version number -- CHM uses 3)
469 return S_FALSE;
470 if (ReadUInt32() != 0x20) // $20 (length of ITSF)
471 return S_FALSE;
472 UInt32 unknown = ReadUInt32();
473 if (unknown != 0 && unknown != 1) // = 0 for some HxW files, 1 in other cases;
474 return S_FALSE;
475 database.ContentOffset = database.StartPosition + ReadUInt64();
476 /* UInt32 timeStamp = */ ReadUInt32();
477 // A timestamp of some sort.
478 // Considered as a big-endian DWORD, it appears to contain
479 // seconds (MSB) and fractional seconds (second byte).
480 // The third and fourth bytes may contain even more fractional
481 // bits. The 4 least significant bits in the last byte are constant.
482 /* UInt32 lang = */ ReadUInt32(); // BE?
483 }
484 else
485 return S_FALSE;
486 }
487
488 // Section 0
489 ReadChunk(inStream, database.StartPosition + sectionOffsets[0], sectionSizes[0]);
490 if (sectionSizes[0] < 0x18)
491 return S_FALSE;
492 if (ReadUInt32() != 0x01FE)
493 return S_FALSE;
494 ReadUInt32(); // unknown: 0
495 UInt64 fileSize = ReadUInt64();
496 database.UpdatePhySize(fileSize);
497 ReadUInt32(); // unknown: 0
498 ReadUInt32(); // unknown: 0
499
500 // Section 1: The Directory Listing
501 ReadChunk(inStream, database.StartPosition + sectionOffsets[1], sectionSizes[1]);
502 if (ReadUInt32() != kSignature_IFCM)
503 return S_FALSE;
504 if (ReadUInt32() != 1) // (probably a version number)
505 return S_FALSE;
506 UInt32 dirChunkSize = ReadUInt32(); // $2000
507 if (dirChunkSize < 64)
508 return S_FALSE;
509 ReadUInt32(); // $100000 (unknown)
510 ReadUInt32(); // -1 (unknown)
511 ReadUInt32(); // -1 (unknown)
512 UInt32 numDirChunks = ReadUInt32();
513 ReadUInt32(); // 0 (unknown, probably high word of above)
514
515 for (UInt32 ci = 0; ci < numDirChunks; ci++)
516 {
517 UInt64 chunkPos = _inBuffer.GetProcessedSize();
518 if (ReadUInt32() == kSignature_AOLL)
519 {
520 UInt32 quickrefLength = ReadUInt32(); // Len of quickref area at end of directory chunk
521 if (quickrefLength > dirChunkSize || quickrefLength < 2)
522 return S_FALSE;
523 ReadUInt64(); // Directory chunk number
524 // This must match physical position in file, that is
525 // the chunk size times the chunk number must be the
526 // offset from the end of the directory header.
527 ReadUInt64(); // Chunk number of previous listing chunk when reading
528 // directory in sequence (-1 if first listing chunk)
529 ReadUInt64(); // Chunk number of next listing chunk when reading
530 // directory in sequence (-1 if last listing chunk)
531 ReadUInt64(); // Number of first listing entry in this chunk
532 ReadUInt32(); // 1 (unknown -- other values have also been seen here)
533 ReadUInt32(); // 0 (unknown)
534
535 unsigned numItems = 0;
536 for (;;)
537 {
538 const UInt64 offset = _inBuffer.GetProcessedSize() - chunkPos;
539 const UInt32 offsetLimit = dirChunkSize - quickrefLength;
540 if (offset > offsetLimit)
541 return S_FALSE;
542 if (offset == offsetLimit)
543 break;
544 if (database.NewFormat)
545 {
546 const unsigned nameLen = ReadUInt16();
547 if (nameLen == 0)
548 return S_FALSE;
549 UString name;
550 ReadUString(nameLen, name);
551 AString s;
552 ConvertUnicodeToUTF8(name, s);
553 {
554 const unsigned b = ReadByte();
555 s.Add_Space();
556 PrintByte(b, s);
557 }
558 s.Add_Space();
559 UInt64 len = ReadEncInt();
560 // then number of items ?
561 // then length ?
562 // then some data (binary encoding?)
563 if (len > 1u << 29) // what limit here we need?
564 return S_FALSE;
565 if (len)
566 do
567 {
568 const unsigned b = ReadByte();
569 PrintByte(b, s);
570 }
571 while (--len);
572 database.NewFormatString += s;
573 database.NewFormatString += "\r\n";
574 }
575 else
576 {
577 RINOK(ReadDirEntry(database))
578 }
579 numItems++;
580 }
581 Skip(quickrefLength - 2);
582 if (ReadUInt16() != numItems)
583 return S_FALSE;
584 if (numItems > numDirEntries)
585 return S_FALSE;
586 numDirEntries -= numItems;
587 }
588 else
589 Skip(dirChunkSize - 4);
590 }
591 return numDirEntries == 0 ? S_OK : S_FALSE;
592 }
593
DecompressStream(IInStream * inStream,const CDatabase & database,const AString & name)594 HRESULT CInArchive::DecompressStream(IInStream *inStream, const CDatabase &database, const AString &name)
595 {
596 int index = database.FindItem(name);
597 if (index < 0)
598 return S_FALSE;
599 const CItem &item = database.Items[index];
600 _chunkSize = item.Size;
601 return ReadChunk(inStream, database.ContentOffset + item.Offset, item.Size);
602 }
603
604
605 #define DATA_SPACE "::DataSpace/"
606 #define kNameList DATA_SPACE "NameList"
607 #define kStorage DATA_SPACE "Storage/"
608 #define kContent "Content"
609 #define kControlData "ControlData"
610 #define kSpanInfo "SpanInfo"
611 #define kTransform "Transform/"
612 #define kResetTable "/InstanceData/ResetTable"
613 #define kTransformList "List"
614
GetSectionPrefix(const AString & name)615 static AString GetSectionPrefix(const AString &name)
616 {
617 AString s (kStorage);
618 s += name;
619 s.Add_Slash();
620 return s;
621 }
622
623 #define RINOZ(x) { int _tt_ = (x); if (_tt_ != 0) return _tt_; }
624
CompareFiles(const unsigned * p1,const unsigned * p2,void * param)625 static int CompareFiles(const unsigned *p1, const unsigned *p2, void *param)
626 {
627 const CObjectVector<CItem> &items = *(const CObjectVector<CItem> *)param;
628 const CItem &item1 = items[*p1];
629 const CItem &item2 = items[*p2];
630 bool isDir1 = item1.IsDir();
631 bool isDir2 = item2.IsDir();
632 if (isDir1 && !isDir2)
633 return -1;
634 if (isDir2)
635 {
636 if (!isDir1)
637 return 1;
638 }
639 else
640 {
641 RINOZ(MyCompare(item1.Section, item2.Section))
642 RINOZ(MyCompare(item1.Offset, item2.Offset))
643 RINOZ(MyCompare(item1.Size, item2.Size))
644 }
645 return MyCompare(*p1, *p2);
646 }
647
SetIndices()648 void CFilesDatabase::SetIndices()
649 {
650 FOR_VECTOR (i, Items)
651 {
652 const CItem &item = Items[i];
653 if (item.IsUserItem() && item.Name.Len() != 1)
654 Indices.Add(i);
655 }
656 }
657
Sort()658 void CFilesDatabase::Sort()
659 {
660 Indices.Sort(CompareFiles, (void *)&Items);
661 }
662
Check()663 bool CFilesDatabase::Check()
664 {
665 UInt64 maxPos = 0;
666 UInt64 prevSection = 0;
667 FOR_VECTOR (i, Indices)
668 {
669 const CItem &item = Items[Indices[i]];
670 if (item.Section == 0 || item.IsDir())
671 continue;
672 if (item.Section != prevSection)
673 {
674 prevSection = item.Section;
675 maxPos = 0;
676 continue;
677 }
678 if (item.Offset < maxPos)
679 return false;
680 maxPos = item.Offset + item.Size;
681 if (maxPos < item.Offset)
682 return false;
683 }
684 return true;
685 }
686
CheckSectionRefs()687 bool CFilesDatabase::CheckSectionRefs()
688 {
689 FOR_VECTOR (i, Indices)
690 {
691 const CItem &item = Items[Indices[i]];
692 if (item.Section == 0 || item.IsDir())
693 continue;
694 if (item.Section >= Sections.Size())
695 return false;
696 }
697 return true;
698 }
699
GetLog(UInt32 num)700 static int inline GetLog(UInt32 num)
701 {
702 for (int i = 0; i < 32; i++)
703 if (((UInt32)1 << i) == num)
704 return i;
705 return -1;
706 }
707
OpenHighLevel(IInStream * inStream,CFilesDatabase & database)708 HRESULT CInArchive::OpenHighLevel(IInStream *inStream, CFilesDatabase &database)
709 {
710 {
711 // The NameList file
712 RINOK(DecompressStream(inStream, database, (AString)kNameList))
713 /* UInt16 length = */ ReadUInt16();
714 UInt16 numSections = ReadUInt16();
715 for (unsigned i = 0; i < numSections; i++)
716 {
717 CSectionInfo section;
718 const unsigned nameLen = ReadUInt16();
719 UString name;
720 ReadUString(nameLen, name);
721 if (ReadUInt16() != 0)
722 return S_FALSE;
723 ConvertUnicodeToUTF8(name, section.Name);
724 // if (!ConvertUnicodeToUTF8(name, section.Name)) return S_FALSE;
725 database.Sections.Add(section);
726 }
727 }
728
729 unsigned si;
730 for (si = 1; si < database.Sections.Size(); si++)
731 {
732 CSectionInfo §ion = database.Sections[si];
733 AString sectionPrefix (GetSectionPrefix(section.Name));
734 {
735 // Content
736 int index = database.FindItem(sectionPrefix + kContent);
737 if (index < 0)
738 return S_FALSE;
739 const CItem &item = database.Items[index];
740 section.Offset = item.Offset;
741 section.CompressedSize = item.Size;
742 }
743 AString transformPrefix (sectionPrefix + kTransform);
744 if (database.Help2Format)
745 {
746 // Transform List
747 RINOK(DecompressStream(inStream, database, transformPrefix + kTransformList))
748 if ((_chunkSize & 0xF) != 0)
749 return S_FALSE;
750 unsigned numGuids = (unsigned)(_chunkSize / 0x10);
751 if (numGuids < 1)
752 return S_FALSE;
753 for (unsigned i = 0; i < numGuids; i++)
754 {
755 CMethodInfo method;
756 ReadGUID(method.Guid);
757 section.Methods.Add(method);
758 }
759 }
760 else
761 {
762 CMethodInfo method;
763 memcpy(method.Guid, kChmLzxGuid, 16);
764 section.Methods.Add(method);
765 }
766
767 {
768 // Control Data
769 RINOK(DecompressStream(inStream, database, sectionPrefix + kControlData))
770
771 FOR_VECTOR (mi, section.Methods)
772 {
773 CMethodInfo &method = section.Methods[mi];
774 UInt32 numDWORDS = ReadUInt32();
775 if (method.IsLzx())
776 {
777 if (numDWORDS < 5)
778 return S_FALSE;
779 if (ReadUInt32() != kSignature_LZXC)
780 return S_FALSE;
781 CLzxInfo &li = method.LzxInfo;
782 li.Version = ReadUInt32();
783 if (li.Version != 2 && li.Version != 3)
784 return S_FALSE;
785
786 {
787 // There is bug in VC6, if we use function call as parameter for inline function
788 const UInt32 val32 = ReadUInt32();
789 const int n = GetLog(val32);
790 if (n < 0 || n > 16)
791 return S_FALSE;
792 li.ResetIntervalBits = (unsigned)n;
793 }
794
795 {
796 const UInt32 val32 = ReadUInt32();
797 const int n = GetLog(val32);
798 if (n < 0 || n > 16)
799 return S_FALSE;
800 li.WindowSizeBits = (unsigned)n;
801 }
802
803 li.CacheSize = ReadUInt32();
804 numDWORDS -= 5;
805 if (numDWORDS)
806 do
807 ReadUInt32();
808 while (--numDWORDS);
809 }
810 else
811 {
812 if (numDWORDS > 1u << 27)
813 return S_FALSE;
814 const size_t numBytes = (size_t)numDWORDS * 4;
815 // method.ControlData.Alloc(numBytes);
816 // ReadBytes(method.ControlData, numBytes);
817 Skip(numBytes);
818 }
819 }
820 }
821
822 {
823 // SpanInfo
824 RINOK(DecompressStream(inStream, database, sectionPrefix + kSpanInfo))
825 section.UncompressedSize = ReadUInt64();
826 }
827
828 // read ResetTable for LZX
829 FOR_VECTOR (mi, section.Methods)
830 {
831 CMethodInfo &method = section.Methods[mi];
832 if (method.IsLzx())
833 {
834 // ResetTable;
835 RINOK(DecompressStream(inStream, database, transformPrefix +
836 method.GetGuidString() + kResetTable))
837 CResetTable &rt = method.LzxInfo.ResetTable;
838
839 if (_chunkSize < 4)
840 {
841 if (_chunkSize != 0)
842 return S_FALSE;
843 // ResetTable is empty in .chw files
844 if (section.UncompressedSize != 0)
845 return S_FALSE;
846 rt.UncompressedSize = 0;
847 rt.CompressedSize = 0;
848 // rt.BlockSize = 0;
849 }
850 else
851 {
852 const UInt32 ver = ReadUInt32(); // 2 unknown (possibly a version number)
853 if (ver != 2 && ver != 3)
854 return S_FALSE;
855 const UInt32 numEntries = ReadUInt32();
856 const unsigned kEntrySize = 8;
857 if (ReadUInt32() != kEntrySize)
858 return S_FALSE;
859 const unsigned kRtHeaderSize = 4 * 4 + 8 * 3;
860 if (ReadUInt32() != kRtHeaderSize)
861 return S_FALSE;
862 if (kRtHeaderSize + kEntrySize * (UInt64)numEntries != _chunkSize)
863 return S_FALSE;
864
865 rt.UncompressedSize = ReadUInt64();
866 rt.CompressedSize = ReadUInt64();
867 UInt64 blockSize = ReadUInt64();
868 if (blockSize != kBlockSize)
869 return S_FALSE;
870 UInt64 numBlocks = (rt.UncompressedSize + kBlockSize + 1) / kBlockSize;
871 if (numEntries != numBlocks &&
872 numEntries != numBlocks + 1)
873 return S_FALSE;
874
875 rt.ResetOffsets.ClearAndReserve(numEntries);
876
877 for (UInt32 i = 0; i < numEntries; i++)
878 {
879 UInt64 v = ReadUInt64();
880 if (i != 0 && v < rt.ResetOffsets[i - 1])
881 return S_FALSE;
882 rt.ResetOffsets.AddInReserved(v);
883 }
884
885 if (numEntries != 0)
886 if (rt.ResetOffsets[0] != 0)
887 return S_FALSE;
888
889 if (numEntries == numBlocks + 1)
890 {
891 // Lazarus 9-26-2 chm contains additional entty
892 if (rt.ResetOffsets.Back() != rt.CompressedSize)
893 return S_FALSE;
894 }
895 }
896 }
897 }
898 }
899
900 database.SetIndices();
901 database.Sort();
902 return database.Check() ? S_OK : S_FALSE;
903 }
904
Open2(IInStream * inStream,const UInt64 * searchHeaderSizeLimit,CFilesDatabase & database)905 HRESULT CInArchive::Open2(IInStream *inStream,
906 const UInt64 *searchHeaderSizeLimit,
907 CFilesDatabase &database)
908 {
909 IsArc = false;
910 HeadersError = false;
911 UnexpectedEnd = false;
912 UnsupportedFeature = false;
913
914 database.Clear();
915 database.Help2Format = _help2;
916 const UInt32 chmVersion = 3;
917
918 RINOK(InStream_GetPos(inStream, database.StartPosition))
919
920 if (!_inBuffer.Create(1 << 14))
921 return E_OUTOFMEMORY;
922 _inBuffer.SetStream(inStream);
923 _inBuffer.Init();
924
925 if (_help2)
926 {
927 const unsigned kSignatureSize = 8;
928 const UInt64 signature = ((UInt64)kSignature_ITLS << 32) | kSignature_ITOL;
929 UInt64 limit = 1 << 18;
930
931 if (searchHeaderSizeLimit)
932 if (limit > *searchHeaderSizeLimit)
933 limit = *searchHeaderSizeLimit;
934
935 UInt64 val = 0;
936
937 for (;;)
938 {
939 Byte b;
940 if (!_inBuffer.ReadByte(b))
941 return S_FALSE;
942 val >>= 8;
943 val |= ((UInt64)b) << ((kSignatureSize - 1) * 8);
944 if (_inBuffer.GetProcessedSize() >= kSignatureSize)
945 {
946 if (val == signature)
947 break;
948 if (_inBuffer.GetProcessedSize() > limit)
949 return S_FALSE;
950 }
951 }
952
953 database.StartPosition += _inBuffer.GetProcessedSize() - kSignatureSize;
954 RINOK(OpenHelp2(inStream, database))
955 if (database.NewFormat)
956 return S_OK;
957 }
958 else
959 {
960 if (ReadUInt32() != kSignature_ITSF)
961 return S_FALSE;
962 if (ReadUInt32() != chmVersion)
963 return S_FALSE;
964 RINOK(OpenChm(inStream, database))
965 }
966
967
968 #ifndef CHM_LOW
969
970 try
971 {
972 try
973 {
974 HRESULT res = OpenHighLevel(inStream, database);
975 if (res == S_FALSE)
976 {
977 UnsupportedFeature = true;
978 database.HighLevelClear();
979 return S_OK;
980 }
981 RINOK(res)
982 if (!database.CheckSectionRefs())
983 HeadersError = true;
984 database.LowLevel = false;
985 }
986 catch(...)
987 {
988 database.HighLevelClear();
989 throw;
990 }
991 }
992 // catch(const CInBufferException &e) { return e.ErrorCode; }
993 catch(CEnexpectedEndException &) { UnexpectedEnd = true; }
994 catch(CHeaderErrorException &) { HeadersError = true; }
995 catch(...) { throw; }
996
997 #endif
998
999 return S_OK;
1000 }
1001
Open(IInStream * inStream,const UInt64 * searchHeaderSizeLimit,CFilesDatabase & database)1002 HRESULT CInArchive::Open(IInStream *inStream,
1003 const UInt64 *searchHeaderSizeLimit,
1004 CFilesDatabase &database)
1005 {
1006 try
1007 {
1008 try
1009 {
1010 HRESULT res = Open2(inStream, searchHeaderSizeLimit, database);
1011 m_InStreamRef.Release();
1012 return res;
1013 }
1014 catch(...)
1015 {
1016 m_InStreamRef.Release();
1017 throw;
1018 }
1019 }
1020 catch(const CInBufferException &e) { return e.ErrorCode; }
1021 catch(CEnexpectedEndException &) { UnexpectedEnd = true; }
1022 catch(CHeaderErrorException &) { HeadersError = true; }
1023 return S_FALSE;
1024 }
1025
1026 }}
1027