xref: /aosp_15_r20/external/lzma/CPP/7zip/UI/Common/Bench.cpp (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 // #include <stdio.h>
6 
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif // _WIN32
11 
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #include <unistd.h>
15 #ifdef USE_POSIX_TIME2
16 #include <sys/time.h>
17 #include <sys/times.h>
18 #endif
19 #endif // USE_POSIX_TIME
20 
21 #ifdef _WIN32
22 #define USE_ALLOCA
23 #endif
24 
25 #ifdef USE_ALLOCA
26 #ifdef _WIN32
27 #include <malloc.h>
28 #else
29 #include <stdlib.h>
30 #endif
31 #define BENCH_ALLOCA_VALUE(index) (((index) * 64 * 21) & 0x7FF)
32 #endif
33 
34 #include "../../../../C/7zCrc.h"
35 #include "../../../../C/RotateDefs.h"
36 #include "../../../../C/CpuArch.h"
37 
38 #ifndef Z7_ST
39 #include "../../../Windows/Synchronization.h"
40 #include "../../../Windows/Thread.h"
41 #endif
42 
43 #include "../../../Windows/FileFind.h"
44 #include "../../../Windows/FileIO.h"
45 #include "../../../Windows/SystemInfo.h"
46 
47 #include "../../../Common/MyBuffer2.h"
48 #include "../../../Common/IntToString.h"
49 #include "../../../Common/StringConvert.h"
50 #include "../../../Common/StringToInt.h"
51 #include "../../../Common/Wildcard.h"
52 
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamObjects.h"
55 #include "../../Common/StreamUtils.h"
56 
57 #include "Bench.h"
58 
59 using namespace NWindows;
60 
61 #ifndef Z7_ST
62 static const UInt32 k_LZMA = 0x030101;
63 #endif
64 
65 static const UInt64 kComplexInCommands = (UInt64)1 <<
66   #ifdef UNDER_CE
67     31;
68   #else
69     34;
70   #endif
71 
72 static const UInt32 kComplexInMs = 4000;
73 
SetComplexCommandsMs(UInt32 complexInMs,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)74 static void SetComplexCommandsMs(UInt32 complexInMs,
75     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
76 {
77   complexInCommands = kComplexInCommands;
78   const UInt64 kMinFreq = (UInt64)1000000 * 4;
79   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
80   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
81     cpuFreq = kMinFreq;
82   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
83   {
84     if (complexInMs != 0)
85       complexInCommands = complexInMs * cpuFreq / 1000;
86     else
87       complexInCommands = cpuFreq >> 2;
88   }
89 }
90 
91 // const UInt64 kBenchmarkUsageMult = 1000000; // for debug
92 static const unsigned kBenchmarkUsageMultBits = 16;
93 static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
94 
Benchmark_GetUsage_Percents(UInt64 usage)95 UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
96 {
97   return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
98 }
99 
100 static const unsigned kNumHashDictBits = 17;
101 static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
102 
103 static const unsigned kOldLzmaDictBits = 32;
104 
105 // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
106 static const size_t kAdditionalSize = (size_t)1 << 16;
107 static const size_t kCompressedAdditionalSize = 1 << 10;
108 
109 static const UInt32 kMaxMethodPropSize = 1 << 6;
110 
111 
112 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
113   { (_buffer_)->Alloc(_size_); \
114   if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
115 
116 
117 class CBaseRandomGenerator
118 {
119   UInt32 A1;
120   UInt32 A2;
121   UInt32 Salt;
122 public:
CBaseRandomGenerator(UInt32 salt=0)123   CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()124   void Init() { A1 = 362436069; A2 = 521288629;}
125   Z7_FORCE_INLINE
GetRnd()126   UInt32 GetRnd()
127   {
128 #if 0
129     // for debug:
130     return 0x0c080400;
131     // return 0;
132 #else
133     return Salt ^
134     (
135       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
136       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
137     );
138 #endif
139   }
140 };
141 
142 
143 static const size_t k_RandBuf_AlignMask = 4 - 1;
144 
145 Z7_NO_INLINE
RandGen_BufAfterPad(Byte * buf,size_t size)146 static void RandGen_BufAfterPad(Byte *buf, size_t size)
147 {
148   CBaseRandomGenerator RG;
149   for (size_t i = 0; i < size; i += 4)
150   {
151     const UInt32 v = RG.GetRnd();
152     SetUi32a(buf + i, v)
153   }
154   /*
155   UInt32 v = RG.GetRnd();
156   for (; i < size; i++)
157   {
158     buf[i] = (Byte)v;
159     v >>= 8;
160   }
161   */
162 }
163 
164 
165 class CBenchRandomGenerator: public CMidAlignedBuffer
166 {
GetVal(UInt32 & res,unsigned numBits)167   static UInt32 GetVal(UInt32 &res, unsigned numBits)
168   {
169     const UInt32 val = res & (((UInt32)1 << numBits) - 1);
170     res >>= numBits;
171     return val;
172   }
173 
GetLen(UInt32 & r)174   static UInt32 GetLen(UInt32 &r)
175   {
176     const unsigned len = (unsigned)GetVal(r, 2);
177     return GetVal(r, 1 + len);
178   }
179 
180 public:
181 
GenerateSimpleRandom(UInt32 salt)182   void GenerateSimpleRandom(UInt32 salt)
183   {
184     CBaseRandomGenerator rg(salt);
185     const size_t bufSize = Size();
186     Byte *buf = (Byte *)*this;
187     for (size_t i = 0; i < bufSize; i++)
188       buf[i] = (Byte)rg.GetRnd();
189   }
190 
GenerateLz(unsigned dictBits,UInt32 salt)191   void GenerateLz(unsigned dictBits, UInt32 salt)
192   {
193     CBaseRandomGenerator rg(salt);
194     size_t pos = 0;
195     size_t rep0 = 1;
196     const size_t bufSize = Size();
197     Byte *buf = (Byte *)*this;
198     unsigned posBits = 1;
199 
200     // printf("\n dictBits = %d\n", (UInt32)dictBits);
201     // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
202 
203     while (pos < bufSize)
204     {
205       /*
206       if (pos >= ((UInt32)1 << 31))
207         printf(" %x\n", pos);
208       */
209       UInt32 r = rg.GetRnd();
210       if (GetVal(r, 1) == 0 || pos < 1024)
211         buf[pos++] = (Byte)(r & 0xFF);
212       else
213       {
214         UInt32 len;
215         len = 1 + GetLen(r);
216 
217         if (GetVal(r, 3) != 0)
218         {
219           len += GetLen(r);
220 
221           while (((size_t)1 << posBits) < pos)
222             posBits++;
223 
224           unsigned numBitsMax = dictBits;
225           if (numBitsMax > posBits)
226             numBitsMax = posBits;
227 
228           const unsigned kAddBits = 6;
229           unsigned numLogBits = 5;
230           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
231             numLogBits = 4;
232 
233           for (;;)
234           {
235             const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
236             r = rg.GetRnd();
237             if (ppp > numBitsMax)
238               continue;
239             // rep0 = GetVal(r, ppp);
240             rep0 = r & (((size_t)1 << ppp) - 1);
241             if (rep0 < pos)
242               break;
243             r = rg.GetRnd();
244           }
245           rep0++;
246         }
247 
248         // len *= 300; // for debug
249         {
250           const size_t rem = bufSize - pos;
251           if (len > rem)
252             len = (UInt32)rem;
253         }
254         Byte *dest = buf + pos;
255         const Byte *src = dest - rep0;
256         pos += len;
257         for (UInt32 i = 0; i < len; i++)
258           *dest++ = *src++;
259       }
260     }
261     // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
262   }
263 };
264 
265 
266 Z7_CLASS_IMP_NOQIB_1(
267   CBenchmarkInStream
268   , ISequentialInStream
269 )
270   const Byte *Data;
271   size_t Pos;
272   size_t Size;
273 public:
274   void Init(const Byte *data, size_t size)
275   {
276     Data = data;
277     Size = size;
278     Pos = 0;
279   }
280   bool WasFinished() const { return Pos == Size; }
281 };
282 
283 Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
284 {
285   const UInt32 kMaxBlockSize = (1 << 20);
286   if (size > kMaxBlockSize)
287     size = kMaxBlockSize;
288   const size_t remain = Size - Pos;
289   if (size > remain)
290     size = (UInt32)remain;
291 
292   if (size)
293     memcpy(data, Data + Pos, size);
294 
295   Pos += size;
296   if (processedSize)
297     *processedSize = size;
298   return S_OK;
299 }
300 
301 
302 class CBenchmarkOutStream Z7_final:
303   public ISequentialOutStream,
304   public CMyUnknownImp,
305   public CMidAlignedBuffer
306 {
307   Z7_COM_UNKNOWN_IMP_0
308   Z7_IFACE_COM7_IMP(ISequentialOutStream)
309   // bool _overflow;
310 public:
311   size_t Pos;
312   bool RealCopy;
313   bool CalcCrc;
314   UInt32 Crc;
315 
316   // CBenchmarkOutStream(): _overflow(false) {}
317   void Init(bool realCopy, bool calcCrc)
318   {
319     Crc = CRC_INIT_VAL;
320     RealCopy = realCopy;
321     CalcCrc = calcCrc;
322     // _overflow = false;
323     Pos = 0;
324   }
325 
326   void InitCrc()
327   {
328     Crc = CRC_INIT_VAL;
329   }
330 
331   void Calc(const void *data, size_t size)
332   {
333     Crc = CrcUpdate(Crc, data, size);
334   }
335 
336   size_t GetPos() const { return Pos; }
337 
338   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
339 };
340 
341 Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
342 {
343   size_t curSize = Size() - Pos;
344   if (curSize > size)
345     curSize = size;
346   if (curSize != 0)
347   {
348     if (RealCopy)
349       memcpy(((Byte *)*this) + Pos, data, curSize);
350     if (CalcCrc)
351       Calc(data, curSize);
352     Pos += curSize;
353   }
354   if (processedSize)
355     *processedSize = (UInt32)curSize;
356   if (curSize != size)
357   {
358     // _overflow = true;
359     return E_FAIL;
360   }
361   return S_OK;
362 }
363 
364 
365 Z7_CLASS_IMP_NOQIB_1(
366   CCrcOutStream
367   , ISequentialOutStream
368 )
369 public:
370   bool CalcCrc;
371   UInt32 Crc;
372   UInt64 Pos;
373 
374   CCrcOutStream(): CalcCrc(true) {}
375   void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
376   void Calc(const void *data, size_t size)
377   {
378     Crc = CrcUpdate(Crc, data, size);
379   }
380 };
381 
382 Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
383 {
384   if (CalcCrc)
385     Calc(data, size);
386   Pos += size;
387   if (processedSize)
388     *processedSize = size;
389   return S_OK;
390 }
391 
392 // #include "../../../../C/My_sys_time.h"
393 
394 static UInt64 GetTimeCount()
395 {
396   #ifdef USE_POSIX_TIME
397   #ifdef USE_POSIX_TIME2
398   timeval v;
399   if (gettimeofday(&v, NULL) == 0)
400     return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
401   return (UInt64)time(NULL) * 1000000;
402   #else
403   return time(NULL);
404   #endif
405   #else
406   LARGE_INTEGER value;
407   if (::QueryPerformanceCounter(&value))
408     return (UInt64)value.QuadPart;
409   return GetTickCount();
410   #endif
411 }
412 
413 static UInt64 GetFreq()
414 {
415   #ifdef USE_POSIX_TIME
416   #ifdef USE_POSIX_TIME2
417   return 1000000;
418   #else
419   return 1;
420   #endif
421   #else
422   LARGE_INTEGER value;
423   if (::QueryPerformanceFrequency(&value))
424     return (UInt64)value.QuadPart;
425   return 1000;
426   #endif
427 }
428 
429 
430 #ifdef USE_POSIX_TIME
431 
432 struct CUserTime
433 {
434   UInt64 Sum;
435   clock_t Prev;
436 
437   void Init()
438   {
439     // Prev = clock();
440     Sum = 0;
441     Prev = 0;
442     Update();
443     Sum = 0;
444   }
445 
446   void Update()
447   {
448     tms t;
449     /* clock_t res = */ times(&t);
450     clock_t newVal = t.tms_utime + t.tms_stime;
451     Sum += (UInt64)(newVal - Prev);
452     Prev = newVal;
453 
454     /*
455     clock_t v = clock();
456     if (v != -1)
457     {
458       Sum += v - Prev;
459       Prev = v;
460     }
461     */
462   }
463   UInt64 GetUserTime()
464   {
465     Update();
466     return Sum;
467   }
468 };
469 
470 #else
471 
472 
473 struct CUserTime
474 {
475   bool UseTick;
476   DWORD Prev_Tick;
477   UInt64 Prev;
478   UInt64 Sum;
479 
480   void Init()
481   {
482     UseTick = false;
483     Prev_Tick = 0;
484     Prev = 0;
485     Sum = 0;
486     Update();
487     Sum = 0;
488   }
489   UInt64 GetUserTime()
490   {
491     Update();
492     return Sum;
493   }
494   void Update();
495 };
496 
497 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
498 
499 void CUserTime::Update()
500 {
501   DWORD new_Tick = GetTickCount();
502   FILETIME creationTime, exitTime, kernelTime, userTime;
503   if (!UseTick &&
504       #ifdef UNDER_CE
505         ::GetThreadTimes(::GetCurrentThread()
506       #else
507         ::GetProcessTimes(::GetCurrentProcess()
508       #endif
509       , &creationTime, &exitTime, &kernelTime, &userTime))
510   {
511     UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
512     Sum += newVal - Prev;
513     Prev = newVal;
514   }
515   else
516   {
517     UseTick = true;
518     Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
519   }
520   Prev_Tick = new_Tick;
521 }
522 
523 
524 #endif
525 
526 static UInt64 GetUserFreq()
527 {
528   #ifdef USE_POSIX_TIME
529   // return CLOCKS_PER_SEC;
530   return (UInt64)sysconf(_SC_CLK_TCK);
531   #else
532   return 10000000;
533   #endif
534 }
535 
536 class CBenchProgressStatus Z7_final
537 {
538   #ifndef Z7_ST
539   NSynchronization::CCriticalSection CS;
540   #endif
541 public:
542   HRESULT Res;
543   bool EncodeMode;
544   void SetResult(HRESULT res)
545   {
546     #ifndef Z7_ST
547     NSynchronization::CCriticalSectionLock lock(CS);
548     #endif
549     Res = res;
550   }
551   HRESULT GetResult()
552   {
553     #ifndef Z7_ST
554     NSynchronization::CCriticalSectionLock lock(CS);
555     #endif
556     return Res;
557   }
558 };
559 
560 struct CBenchInfoCalc
561 {
562   CBenchInfo BenchInfo;
563   CUserTime UserTime;
564 
565   void SetStartTime();
566   void SetFinishTime(CBenchInfo &dest);
567 };
568 
569 void CBenchInfoCalc::SetStartTime()
570 {
571   BenchInfo.GlobalFreq = GetFreq();
572   BenchInfo.UserFreq = GetUserFreq();
573   BenchInfo.GlobalTime = ::GetTimeCount();
574   BenchInfo.UserTime = 0;
575   UserTime.Init();
576 }
577 
578 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
579 {
580   dest = BenchInfo;
581   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
582   dest.UserTime = UserTime.GetUserTime();
583 }
584 
585 class CBenchProgressInfo Z7_final:
586   public ICompressProgressInfo,
587   public CMyUnknownImp,
588   public CBenchInfoCalc
589 {
590   Z7_COM_UNKNOWN_IMP_0
591   Z7_IFACE_COM7_IMP(ICompressProgressInfo)
592 public:
593   CBenchProgressStatus *Status;
594   IBenchCallback *Callback;
595 
596   CBenchProgressInfo(): Callback(NULL) {}
597 };
598 
599 
600 Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
601 {
602   HRESULT res = Status->GetResult();
603   if (res != S_OK)
604     return res;
605   if (!Callback)
606     return res;
607 
608   /*
609   static UInt64 inSizePrev = 0;
610   static UInt64 outSizePrev = 0;
611   UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
612   if (inSize)   { val1 = *inSize;  delta1 = val1 - inSizePrev;  inSizePrev  = val1; }
613   if (outSize)  { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2;  }
614   UInt64 percents = delta2 * 1000;
615   if (delta1 != 0)
616     percents /= delta1;
617   printf("=== %7d %7d     %7d %7d  ratio = %4d\n",
618       (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
619       (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
620       (unsigned)percents);
621   */
622 
623   CBenchInfo info;
624   SetFinishTime(info);
625   if (Status->EncodeMode)
626   {
627     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
628     info.PackSize = BenchInfo.PackSize + *outSize;
629     res = Callback->SetEncodeResult(info, false);
630   }
631   else
632   {
633     info.PackSize = BenchInfo.PackSize + *inSize;
634     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
635     res = Callback->SetDecodeResult(info, false);
636   }
637   if (res != S_OK)
638     Status->SetResult(res);
639   return res;
640 }
641 
642 static const unsigned kSubBits = 8;
643 
644 static unsigned GetLogSize(UInt64 size)
645 {
646   unsigned i = 0;
647   for (;;)
648   {
649     i++;  size >>= 1;  if (size == 0) break;
650   }
651   return i;
652 }
653 
654 
655 static UInt32 GetLogSize_Sub(UInt64 size)
656 {
657   if (size <= 1)
658     return 0;
659   const unsigned i = GetLogSize(size) - 1;
660   UInt32 v;
661   if (i <= kSubBits)
662     v = (UInt32)(size) << (kSubBits - i);
663   else
664     v = (UInt32)(size >> (i - kSubBits));
665   return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
666 }
667 
668 
669 static UInt64 Get_UInt64_from_double(double v)
670 {
671   const UInt64 kMaxVal = (UInt64)1 << 62;
672   if (v > (double)(Int64)kMaxVal)
673     return kMaxVal;
674   return (UInt64)v;
675 }
676 
677 static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
678 {
679   if (d == 0)
680     d = 1;
681   const double v =
682       (double)(Int64)m1 *
683       (double)(Int64)m2 /
684       (double)(Int64)d;
685   return Get_UInt64_from_double(v);
686   /*
687   unsigned n1 = GetLogSize(m1);
688   unsigned n2 = GetLogSize(m2);
689   while (n1 + n2 > 64)
690   {
691     if (n1 >= n2)
692     {
693       m1 >>= 1;
694       n1--;
695     }
696     else
697     {
698       m2 >>= 1;
699       n2--;
700     }
701     d >>= 1;
702   }
703 
704   if (d == 0)
705     d = 1;
706   return m1 * m2 / d;
707   */
708 }
709 
710 
711 UInt64 CBenchInfo::GetUsage() const
712 {
713   UInt64 userTime = UserTime;
714   UInt64 userFreq = UserFreq;
715   UInt64 globalTime = GlobalTime;
716   UInt64 globalFreq = GlobalFreq;
717 
718   if (userFreq == 0)
719     userFreq = 1;
720   if (globalTime == 0)
721     globalTime = 1;
722 
723   const double v =
724         ((double)(Int64)userTime / (double)(Int64)userFreq)
725       * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
726       * (double)(Int64)kBenchmarkUsageMult;
727   return Get_UInt64_from_double(v);
728   /*
729   return MyMultDiv64(
730         MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
731         globalFreq, globalTime);
732   */
733 }
734 
735 
736 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
737 {
738   if (UserTime == 0)
739   {
740     return 0;
741     // userTime = 1;
742   }
743   UInt64 globalFreq = GlobalFreq;
744   if (globalFreq == 0)
745     globalFreq = 1;
746 
747   const double v =
748         ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
749       * ((double)(Int64)UserFreq  / (double)(Int64)UserTime)
750       * (double)(Int64)rating;
751   return Get_UInt64_from_double(v);
752   /*
753   return MyMultDiv64(
754         MyMultDiv64(rating, UserFreq, UserTime),
755         GlobalTime, globalFreq);
756   */
757 }
758 
759 
760 UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
761 {
762   return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
763 }
764 
765 static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
766 {
767   return complexity >= 0 ?
768       size * (UInt32)complexity :
769       size / (UInt32)(-complexity);
770 }
771 
772 struct CBenchProps
773 {
774   bool LzmaRatingMode;
775 
776   Int32 EncComplex;
777   Int32 DecComplexCompr;
778   Int32 DecComplexUnc;
779 
780   unsigned KeySize;
781 
782   CBenchProps():
783       LzmaRatingMode(false),
784       KeySize(0)
785     {}
786 
787   void SetLzmaCompexity();
788 
789   UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
790   {
791     const UInt32 kMinSize = 100;
792     if (unpackSize < kMinSize)
793       unpackSize = kMinSize;
794     return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
795   }
796 
797   UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
798   {
799     return
800         GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
801         GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
802   }
803 
804   UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
805   UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
806 };
807 
808 void CBenchProps::SetLzmaCompexity()
809 {
810   EncComplex = 1200;
811   DecComplexUnc = 4;
812   DecComplexCompr = 190;
813   LzmaRatingMode = true;
814 }
815 
816 UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
817 {
818   if (dictSize < (1 << kBenchMinDicLogSize))
819     dictSize = (1 << kBenchMinDicLogSize);
820   Int32 encComplex = EncComplex;
821   if (LzmaRatingMode)
822   {
823     /*
824     for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
825     {
826       unsigned rr = GetLogSize_Sub(uu);
827       printf("\n%16I64x , log = %4x", uu, rr);
828       uu += 1;
829       uu += uu / 50;
830     }
831     */
832     // throw 1;
833     const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
834     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
835   }
836   const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
837   return MyMultDiv64(numCommands, freq, elapsedTime);
838 }
839 
840 UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
841 {
842   const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
843   return MyMultDiv64(numCommands, freq, elapsedTime);
844 }
845 
846 
847 
848 UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
849 {
850   CBenchProps props;
851   props.SetLzmaCompexity();
852   return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
853 }
854 
855 UInt64 CBenchInfo::GetRating_LzmaDec() const
856 {
857   CBenchProps props;
858   props.SetLzmaCompexity();
859   return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
860 }
861 
862 
863 #ifndef Z7_ST
864 
865 #define NUM_CPU_LEVELS_MAX 3
866 
867 struct CAffinityMode
868 {
869   unsigned NumBundleThreads;
870   unsigned NumLevels;
871   unsigned NumCoreThreads;
872   unsigned NumCores;
873   // unsigned DivideNum;
874   UInt32 Sizes[NUM_CPU_LEVELS_MAX];
875 
876   void SetLevels(unsigned numCores, unsigned numCoreThreads);
877   DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
878   bool NeedAffinity() const { return NumBundleThreads != 0; }
879 
880   WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
881   {
882     if (NeedAffinity())
883     {
884       CCpuSet cpuSet;
885       GetAffinityMask(bundleIndex, &cpuSet);
886       return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
887     }
888     return thread.Create(startAddress, parameter);
889   }
890 
891   CAffinityMode():
892     NumBundleThreads(0),
893     NumLevels(0),
894     NumCoreThreads(1)
895     // DivideNum(1)
896     {}
897 };
898 
899 void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
900 {
901   NumCores = numCores;
902   NumCoreThreads = numCoreThreads;
903   NumLevels = 0;
904   if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
905     return;
906   UInt32 c = numCores / numCoreThreads;
907   UInt32 c2 = 1;
908   while ((c & 1) == 0)
909   {
910     c >>= 1;
911     c2 <<= 1;
912   }
913   if (c2 != 1)
914     Sizes[NumLevels++] = c2;
915   if (c != 1)
916     Sizes[NumLevels++] = c;
917   if (numCoreThreads != 1)
918     Sizes[NumLevels++] = numCoreThreads;
919   if (NumLevels == 0)
920     Sizes[NumLevels++] = 1;
921 
922   /*
923   printf("\n Cores:");
924   for (unsigned i = 0; i < NumLevels; i++)
925   {
926     printf(" %d", Sizes[i]);
927   }
928   printf("\n");
929   */
930 }
931 
932 
933 DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
934 {
935   CpuSet_Zero(cpuSet);
936 
937   if (NumLevels == 0)
938     return 0;
939 
940   // printf("\n%2d", bundleIndex);
941 
942   /*
943   UInt32 low = 0;
944   if (DivideNum != 1)
945   {
946     low = bundleIndex % DivideNum;
947     bundleIndex /= DivideNum;
948   }
949   */
950 
951   UInt32 numGroups = NumCores / NumBundleThreads;
952   UInt32 m = bundleIndex % numGroups;
953   UInt32 v = 0;
954   for (unsigned i = 0; i < NumLevels; i++)
955   {
956     UInt32 size = Sizes[i];
957     while ((size & 1) == 0)
958     {
959       v *= 2;
960       v |= (m & 1);
961       m >>= 1;
962       size >>= 1;
963     }
964     v *= size;
965     v += m % size;
966     m /= size;
967   }
968 
969   // UInt32 nb = NumBundleThreads / DivideNum;
970   UInt32 nb = NumBundleThreads;
971 
972   DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
973   // v += low;
974   mask <<= v;
975 
976   // printf(" %2d %8x \n ", v, (unsigned)mask);
977   #ifdef _WIN32
978     *cpuSet = mask;
979   #else
980   {
981     for (unsigned k = 0; k < nb; k++)
982       CpuSet_Set(cpuSet, v + k);
983   }
984   #endif
985 
986   return mask;
987 }
988 
989 
990 struct CBenchSyncCommon
991 {
992   bool ExitMode;
993   NSynchronization::CManualResetEvent StartEvent;
994 
995   CBenchSyncCommon(): ExitMode(false) {}
996 };
997 
998 #endif
999 
1000 
1001 
1002 enum E_CheckCrcMode
1003 {
1004   k_CheckCrcMode_Never = 0,
1005   k_CheckCrcMode_Always = 1,
1006   k_CheckCrcMode_FirstPass = 2
1007 };
1008 
1009 class CEncoderInfo;
1010 
1011 class CEncoderInfo Z7_final
1012 {
1013   Z7_CLASS_NO_COPY(CEncoderInfo)
1014 
1015 public:
1016 
1017   #ifndef Z7_ST
1018   NWindows::CThread thread[2];
1019   NSynchronization::CManualResetEvent ReadyEvent;
1020   UInt32 NumDecoderSubThreads;
1021   CBenchSyncCommon *Common;
1022   UInt32 EncoderIndex;
1023   UInt32 NumEncoderInternalThreads;
1024   CAffinityMode AffinityMode;
1025   bool IsGlobalMtMode; // if more than one benchmark encoder threads
1026   #endif
1027 
1028   CMyComPtr<ICompressCoder> _encoder;
1029   CMyComPtr<ICompressFilter> _encoderFilter;
1030   CBenchProgressInfo *progressInfoSpec[2];
1031   CMyComPtr<ICompressProgressInfo> progressInfo[2];
1032   UInt64 NumIterations;
1033 
1034   UInt32 Salt;
1035 
1036   #ifdef USE_ALLOCA
1037   size_t AllocaSize;
1038   #endif
1039 
1040   unsigned KeySize;
1041   Byte _key[32];
1042   Byte _iv[16];
1043 
1044   HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1045   {
1046     RINOK(cp->SetKey(_key, KeySize))
1047     return cp->SetInitVector(_iv, sizeof(_iv));
1048   }
1049 
1050   Byte _psw[16];
1051 
1052   bool CheckCrc_Enc;    /* = 1, if we want to check packed data crcs after each pass
1053                                 used for filter and usual coders */
1054   bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1055                                 used only for filter */
1056   E_CheckCrcMode CheckCrcMode_Dec;
1057 
1058   struct CDecoderInfo
1059   {
1060     CEncoderInfo *Encoder;
1061     UInt32 DecoderIndex;
1062     bool CallbackMode;
1063 
1064     #ifdef USE_ALLOCA
1065     size_t AllocaSize;
1066     #endif
1067   };
1068   CDecoderInfo decodersInfo[2];
1069 
1070   CMyComPtr<ICompressCoder> _decoders[2];
1071   CMyComPtr<ICompressFilter> _decoderFilter;
1072 
1073   HRESULT Results[2];
1074   CBenchmarkOutStream *outStreamSpec;
1075   CMyComPtr<ISequentialOutStream> outStream;
1076   IBenchCallback *callback;
1077   IBenchPrintCallback *printCallback;
1078   UInt32 crc;
1079   size_t kBufferSize;
1080   size_t compressedSize;
1081   const Byte *uncompressedDataPtr;
1082 
1083   const Byte *fileData;
1084   CBenchRandomGenerator rg;
1085 
1086   CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1087 
1088   // CBenchmarkOutStream *propStreamSpec;
1089   Byte propsData[kMaxMethodPropSize];
1090   CBufPtrSeqOutStream *propStreamSpec;
1091   CMyComPtr<ISequentialOutStream> propStream;
1092 
1093   unsigned generateDictBits;
1094   COneMethodInfo _method;
1095 
1096   // for decode
1097   size_t _uncompressedDataSize;
1098 
1099   HRESULT Generate();
1100   HRESULT Encode();
1101   HRESULT Decode(UInt32 decoderIndex);
1102 
1103   CEncoderInfo():
1104     #ifndef Z7_ST
1105     Common(NULL),
1106     IsGlobalMtMode(true),
1107     #endif
1108     Salt(0),
1109     KeySize(0),
1110     CheckCrc_Enc(true),
1111     UseRealData_Enc(true),
1112     CheckCrcMode_Dec(k_CheckCrcMode_Always),
1113     outStreamSpec(NULL),
1114     callback(NULL),
1115     printCallback(NULL),
1116     fileData(NULL),
1117     propStreamSpec(NULL)
1118     {}
1119 
1120   #ifndef Z7_ST
1121 
1122   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1123   {
1124     HRESULT res;
1125     CEncoderInfo *encoder = (CEncoderInfo *)param;
1126     try
1127     {
1128       #ifdef USE_ALLOCA
1129       alloca(encoder->AllocaSize);
1130       #endif
1131 
1132       res = encoder->Encode();
1133     }
1134     catch(...)
1135     {
1136       res = E_FAIL;
1137     }
1138     encoder->Results[0] = res;
1139     if (res != S_OK)
1140       encoder->progressInfoSpec[0]->Status->SetResult(res);
1141     encoder->ReadyEvent.Set();
1142     return THREAD_FUNC_RET_ZERO;
1143   }
1144 
1145   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1146   {
1147     CDecoderInfo *decoder = (CDecoderInfo *)param;
1148 
1149     #ifdef USE_ALLOCA
1150     alloca(decoder->AllocaSize);
1151     // printf("\nalloca=%d\n", (unsigned)decoder->AllocaSize);
1152     #endif
1153 
1154     CEncoderInfo *encoder = decoder->Encoder;
1155     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1156     return THREAD_FUNC_RET_ZERO;
1157   }
1158 
1159   HRESULT CreateEncoderThread()
1160   {
1161     WRes res = 0;
1162     if (!ReadyEvent.IsCreated())
1163       res = ReadyEvent.Create();
1164     if (res == 0)
1165       res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1166           EncoderIndex);
1167     return HRESULT_FROM_WIN32(res);
1168   }
1169 
1170   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1171       #ifdef USE_ALLOCA
1172       , size_t allocaSize
1173       #endif
1174       )
1175   {
1176     CDecoderInfo &decoder = decodersInfo[index];
1177     decoder.DecoderIndex = index;
1178     decoder.Encoder = this;
1179 
1180     #ifdef USE_ALLOCA
1181     decoder.AllocaSize = allocaSize;
1182     #endif
1183 
1184     decoder.CallbackMode = callbackMode;
1185 
1186     WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1187         // EncoderIndex * NumEncoderInternalThreads + index
1188         EncoderIndex
1189         );
1190 
1191     return HRESULT_FROM_WIN32(res);
1192   }
1193 
1194   #endif
1195 };
1196 
1197 
1198 
1199 
1200 static size_t GetBenchCompressedSize(size_t bufferSize)
1201 {
1202   return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1203   // kBufferSize / 2;
1204 }
1205 
1206 
1207 HRESULT CEncoderInfo::Generate()
1208 {
1209   const COneMethodInfo &method = _method;
1210 
1211   // we need extra space, if input data is already compressed
1212   const size_t kCompressedBufferSize = _encoderFilter ?
1213       kBufferSize :
1214       GetBenchCompressedSize(kBufferSize);
1215 
1216   if (kCompressedBufferSize < kBufferSize)
1217     return E_FAIL;
1218 
1219   uncompressedDataPtr = fileData;
1220   if (fileData)
1221   {
1222     #if !defined(Z7_ST)
1223     if (IsGlobalMtMode)
1224     {
1225       /* we copy the data to local buffer of thread to eliminate
1226          using of shared buffer by different threads */
1227       ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228       memcpy((Byte *)rg, fileData, kBufferSize);
1229       uncompressedDataPtr = (const Byte *)rg;
1230     }
1231     #endif
1232   }
1233   else
1234   {
1235     ALLOC_WITH_HRESULT(&rg, kBufferSize)
1236     // DWORD ttt = GetTickCount();
1237     if (generateDictBits == 0)
1238       rg.GenerateSimpleRandom(Salt);
1239     else
1240     {
1241       if (generateDictBits >= sizeof(size_t) * 8
1242           && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1243         return E_INVALIDARG;
1244       rg.GenerateLz(generateDictBits, Salt);
1245       // return E_ABORT; // for debug
1246     }
1247     // printf("\n%d\n            ", GetTickCount() - ttt);
1248 
1249     crc = CrcCalc((const Byte *)rg, rg.Size());
1250     uncompressedDataPtr = (const Byte *)rg;
1251   }
1252 
1253   if (!outStream)
1254   {
1255     outStreamSpec = new CBenchmarkOutStream;
1256     outStream = outStreamSpec;
1257   }
1258 
1259   ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1260 
1261   if (_encoderFilter)
1262   {
1263     /* we try to reduce the number of memcpy() in main encoding loop.
1264        so we copy data to temp buffers here */
1265     ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1266     memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1267     memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1268   }
1269 
1270   if (!propStream)
1271   {
1272     propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1273     propStream = propStreamSpec;
1274   }
1275   // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1276   // propStreamSpec->Init(true, false);
1277   propStreamSpec->Init(propsData, sizeof(propsData));
1278 
1279 
1280   CMyComPtr<IUnknown> coder;
1281   if (_encoderFilter)
1282     coder = _encoderFilter;
1283   else
1284     coder = _encoder;
1285   {
1286     CMyComPtr<ICompressSetCoderProperties> scp;
1287     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1288     if (scp)
1289     {
1290       const UInt64 reduceSize = kBufferSize;
1291 
1292       /* in posix new thread uses same affinity as parent thread,
1293          so we don't need to send affinity to coder in posix */
1294       UInt64 affMask;
1295       #if !defined(Z7_ST) && defined(_WIN32)
1296       {
1297         CCpuSet cpuSet;
1298         affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1299       }
1300       #else
1301         affMask = 0;
1302       #endif
1303       // affMask <<= 3; // debug line: to test no affinity in coder;
1304       // affMask = 0;
1305 
1306       RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1307     }
1308     else
1309     {
1310       if (method.AreThereNonOptionalProps())
1311         return E_INVALIDARG;
1312     }
1313 
1314     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1315     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1316     if (writeCoderProps)
1317     {
1318       RINOK(writeCoderProps->WriteCoderProperties(propStream))
1319     }
1320 
1321     {
1322       CMyComPtr<ICryptoSetPassword> sp;
1323       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1324       if (sp)
1325       {
1326         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1327 
1328         // we must call encoding one time to calculate password key for key cache.
1329         // it must be after WriteCoderProperties!
1330         Byte temp[16];
1331         memset(temp, 0, sizeof(temp));
1332 
1333         if (_encoderFilter)
1334         {
1335           _encoderFilter->Init();
1336           _encoderFilter->Filter(temp, sizeof(temp));
1337         }
1338         else
1339         {
1340           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1341           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1342           inStreamSpec->Init(temp, sizeof(temp));
1343 
1344           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1345           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1346           crcStreamSpec->Init();
1347 
1348           RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1349         }
1350       }
1351     }
1352   }
1353 
1354   return S_OK;
1355 }
1356 
1357 
1358 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1359 {
1360   while (size != 0)
1361   {
1362     UInt32 cur = crc ? 1 << 17 : 1 << 24;
1363     if (cur > size)
1364       cur = (UInt32)size;
1365     UInt32 processed = filter->Filter(data, cur);
1366     /* if (processed > size) (in AES filter), we must fill last block with zeros.
1367        but it is not important for benchmark. So we just copy that data without filtering.
1368        if (processed == 0) then filter can't process more  */
1369     if (processed > size || processed == 0)
1370       processed = (UInt32)size;
1371     if (crc)
1372       *crc = CrcUpdate(*crc, data, processed);
1373     data += processed;
1374     size -= processed;
1375   }
1376 }
1377 
1378 
1379 HRESULT CEncoderInfo::Encode()
1380 {
1381   // printf("\nCEncoderInfo::Generate\n");
1382 
1383   RINOK(Generate())
1384 
1385   // printf("\n2222\n");
1386 
1387   #ifndef Z7_ST
1388   if (Common)
1389   {
1390     Results[0] = S_OK;
1391     WRes wres = ReadyEvent.Set();
1392     if (wres == 0)
1393       wres = Common->StartEvent.Lock();
1394     if (wres != 0)
1395       return HRESULT_FROM_WIN32(wres);
1396     if (Common->ExitMode)
1397       return S_OK;
1398   }
1399   else
1400   #endif
1401   {
1402     CBenchProgressInfo *bpi = progressInfoSpec[0];
1403     bpi->SetStartTime();
1404   }
1405 
1406 
1407   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1408   bi.UnpackSize = 0;
1409   bi.PackSize = 0;
1410   CMyComPtr<ICryptoProperties> cp;
1411   CMyComPtr<IUnknown> coder;
1412   if (_encoderFilter)
1413     coder = _encoderFilter;
1414   else
1415     coder = _encoder;
1416   coder.QueryInterface(IID_ICryptoProperties, &cp);
1417   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1418   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1419 
1420   if (cp)
1421   {
1422     RINOK(Set_Key_and_IV(cp))
1423   }
1424 
1425   compressedSize = 0;
1426   if (_encoderFilter)
1427     compressedSize = kBufferSize;
1428 
1429   // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1430   UInt64 prev = 0;
1431 
1432   const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1433   const bool useCrc = (mask < NumIterations);
1434   bool crcPrev_defined = false;
1435   UInt32 crcPrev = 0;
1436 
1437   bool useRealData_Enc = UseRealData_Enc;
1438   bool data_Was_Changed = false;
1439   if (useRealData_Enc)
1440   {
1441     /* we want memcpy() for each iteration including first iteration.
1442        So results will be equal for different number of iterations */
1443     data_Was_Changed = true;
1444   }
1445 
1446   const UInt64 numIterations = NumIterations;
1447   UInt64 i = numIterations;
1448     // printCallback->NewLine();
1449 
1450   while (i != 0)
1451   {
1452     i--;
1453     if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1454     {
1455       prev = bi.UnpackSize;
1456       RINOK(printCallback->CheckBreak())
1457     }
1458 
1459     /*
1460     CBenchInfo info;
1461     progressInfoSpec[0]->SetStartTime();
1462     */
1463 
1464     bool calcCrc = false;
1465     if (useCrc)
1466       calcCrc = (((UInt32)i & mask) == 0);
1467 
1468     if (_encoderFilter)
1469     {
1470       Byte *filterData = rgCopy;
1471       if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1472       {
1473         // printf("\nfilterData = (Byte *)*outStreamSpec;\n");
1474         filterData = (Byte *)*outStreamSpec;
1475         if (data_Was_Changed)
1476         {
1477           // printf("\nmemcpy(filterData, uncompressedDataPtr\n");
1478           memcpy(filterData, uncompressedDataPtr, kBufferSize);
1479         }
1480         data_Was_Changed = true;
1481       }
1482       _encoderFilter->Init();
1483       if (calcCrc)
1484       {
1485         // printf("\nInitCrc\n");
1486         outStreamSpec->InitCrc();
1487       }
1488       // printf("\nMy_FilterBench\n");
1489       My_FilterBench(_encoderFilter, filterData, kBufferSize,
1490           calcCrc ? &outStreamSpec->Crc : NULL);
1491     }
1492     else
1493     {
1494       outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1495       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1496       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1497       if (!inStreamSpec->WasFinished())
1498         return E_FAIL;
1499       if (compressedSize != outStreamSpec->Pos)
1500       {
1501         if (compressedSize != 0)
1502           return E_FAIL;
1503         compressedSize = outStreamSpec->Pos;
1504       }
1505     }
1506 
1507     // outStreamSpec->Print();
1508 
1509     if (calcCrc)
1510     {
1511       const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1512       if (crcPrev_defined && crcPrev != crc2)
1513         return E_FAIL;
1514       crcPrev = crc2;
1515       crcPrev_defined = true;
1516     }
1517 
1518     bi.UnpackSize += kBufferSize;
1519     bi.PackSize += compressedSize;
1520 
1521     /*
1522     {
1523       progressInfoSpec[0]->SetFinishTime(info);
1524       info.UnpackSize = 0;
1525       info.PackSize = 0;
1526       info.NumIterations = 1;
1527 
1528       info.UnpackSize = kBufferSize;
1529       info.PackSize = compressedSize;
1530       // printf("\n%7d\n", encoder.compressedSize);
1531 
1532       RINOK(callback->SetEncodeResult(info, true))
1533       printCallback->NewLine();
1534     }
1535     */
1536 
1537   }
1538 
1539   _encoder.Release();
1540   _encoderFilter.Release();
1541   return S_OK;
1542 }
1543 
1544 
1545 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1546 {
1547   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1548   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1549   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1550   CMyComPtr<IUnknown> coder;
1551   if (_decoderFilter)
1552   {
1553     if (decoderIndex != 0)
1554       return E_FAIL;
1555     coder = _decoderFilter;
1556   }
1557   else
1558     coder = decoder;
1559 
1560   // printf("\ndecoderIndex = %d, stack = %p", decoderIndex, &coder);
1561 
1562   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1563   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1564   if (!setDecProps && propStreamSpec->GetPos() != 0)
1565     return E_FAIL;
1566 
1567   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1568   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1569 
1570   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1571   pi->BenchInfo.UnpackSize = 0;
1572   pi->BenchInfo.PackSize = 0;
1573 
1574   #ifndef Z7_ST
1575   {
1576     CMyComPtr<ICompressSetCoderMt> setCoderMt;
1577     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1578     if (setCoderMt)
1579     {
1580       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1581     }
1582   }
1583   #endif
1584 
1585   CMyComPtr<ICompressSetCoderProperties> scp;
1586   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1587   if (scp)
1588   {
1589     const UInt64 reduceSize = _uncompressedDataSize;
1590     RINOK(_method.SetCoderProps(scp, &reduceSize))
1591   }
1592 
1593   CMyComPtr<ICryptoProperties> cp;
1594   coder.QueryInterface(IID_ICryptoProperties, &cp);
1595 
1596   if (setDecProps)
1597   {
1598     RINOK(setDecProps->SetDecoderProperties2(
1599         /* (const Byte *)*propStreamSpec, */
1600         propsData,
1601         (UInt32)propStreamSpec->GetPos()))
1602   }
1603 
1604   {
1605     CMyComPtr<ICryptoSetPassword> sp;
1606     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1607     if (sp)
1608     {
1609       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1610     }
1611   }
1612 
1613   UInt64 prev = 0;
1614 
1615   if (cp)
1616   {
1617     RINOK(Set_Key_and_IV(cp))
1618   }
1619 
1620   CMyComPtr<ICompressSetFinishMode> setFinishMode;
1621 
1622   if (_decoderFilter)
1623   {
1624     if (compressedSize > rgCopy.Size())
1625       return E_FAIL;
1626   }
1627   else
1628   {
1629     decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1630   }
1631 
1632   const UInt64 numIterations = NumIterations;
1633   const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1634 
1635   for (UInt64 i = 0; i < numIterations; i++)
1636   {
1637     if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1638     {
1639       RINOK(printCallback->CheckBreak())
1640       prev = pi->BenchInfo.UnpackSize;
1641     }
1642 
1643     const UInt64 outSize = kBufferSize;
1644     bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1645 
1646     crcOutStreamSpec->Init();
1647 
1648     if (_decoderFilter)
1649     {
1650       Byte *filterData = (Byte *)*outStreamSpec;
1651       if (calcCrc)
1652       {
1653         calcCrc = (i == 0);
1654         if (checkCrcMode == k_CheckCrcMode_Always)
1655         {
1656           calcCrc = true;
1657           memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1658           filterData = rgCopy;
1659         }
1660       }
1661       _decoderFilter->Init();
1662       My_FilterBench(_decoderFilter, filterData, compressedSize,
1663           calcCrc ? &crcOutStreamSpec->Crc : NULL);
1664     }
1665     else
1666     {
1667       crcOutStreamSpec->CalcCrc = calcCrc;
1668       inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1669 
1670       if (setFinishMode)
1671       {
1672         RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1673       }
1674 
1675       RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1676 
1677       if (setFinishMode)
1678       {
1679         if (!inStreamSpec->WasFinished())
1680           return S_FALSE;
1681 
1682         CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1683         decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1684 
1685         if (getInStreamProcessedSize)
1686         {
1687           UInt64 processed;
1688           RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1689           if (processed != compressedSize)
1690             return S_FALSE;
1691         }
1692       }
1693 
1694       if (crcOutStreamSpec->Pos != outSize)
1695         return S_FALSE;
1696     }
1697 
1698     if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1699       return S_FALSE;
1700 
1701     pi->BenchInfo.UnpackSize += kBufferSize;
1702     pi->BenchInfo.PackSize += compressedSize;
1703   }
1704 
1705   decoder.Release();
1706   _decoderFilter.Release();
1707   return S_OK;
1708 }
1709 
1710 
1711 static const UInt32 kNumThreadsMax = (1 << 12);
1712 
1713 struct CBenchEncoders
1714 {
1715   CEncoderInfo *encoders;
1716   CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1717   ~CBenchEncoders() { delete []encoders; }
1718 };
1719 
1720 
1721 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1722 {
1723   if (numCommands < (1 << 4))
1724     numCommands = (1 << 4);
1725   UInt64 res = complexInCommands / numCommands;
1726   return (res == 0 ? 1 : res);
1727 }
1728 
1729 
1730 
1731 #ifndef Z7_ST
1732 
1733 // ---------- CBenchThreadsFlusher ----------
1734 
1735 struct CBenchThreadsFlusher
1736 {
1737   CBenchEncoders *EncodersSpec;
1738   CBenchSyncCommon Common;
1739   unsigned NumThreads;
1740   bool NeedClose;
1741 
1742   CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1743 
1744   ~CBenchThreadsFlusher()
1745   {
1746     StartAndWait(true);
1747   }
1748 
1749   WRes StartAndWait(bool exitMode = false);
1750 };
1751 
1752 
1753 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1754 {
1755   if (!NeedClose)
1756     return 0;
1757 
1758   Common.ExitMode = exitMode;
1759   WRes res = Common.StartEvent.Set();
1760 
1761   for (unsigned i = 0; i < NumThreads; i++)
1762   {
1763     NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1764     if (t.IsCreated())
1765     {
1766       WRes res2 = t.Wait_Close();
1767       if (res == 0)
1768         res = res2;
1769     }
1770   }
1771   NeedClose = false;
1772   return res;
1773 }
1774 
1775 #endif // Z7_ST
1776 
1777 
1778 
1779 static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1780 {
1781   for (size_t i = 0; i < size; i++)
1782   {
1783     data[i] = (Byte)startValue;
1784     startValue++;
1785   }
1786 }
1787 
1788 
1789 
1790 static HRESULT MethodBench(
1791     DECL_EXTERNAL_CODECS_LOC_VARS
1792     UInt64 complexInCommands,
1793     #ifndef Z7_ST
1794       bool oldLzmaBenchMode,
1795       UInt32 numThreads,
1796       const CAffinityMode *affinityMode,
1797     #endif
1798     const COneMethodInfo &method2,
1799     size_t uncompressedDataSize,
1800     const Byte *fileData,
1801     unsigned generateDictBits,
1802 
1803     IBenchPrintCallback *printCallback,
1804     IBenchCallback *callback,
1805     CBenchProps *benchProps)
1806 {
1807   COneMethodInfo method = method2;
1808   UInt64 methodId;
1809   UInt32 numStreams;
1810   bool isFilter;
1811   const int codecIndex = FindMethod_Index(
1812       EXTERNAL_CODECS_LOC_VARS
1813       method.MethodName, true,
1814       methodId, numStreams, isFilter);
1815   if (codecIndex < 0)
1816     return E_NOTIMPL;
1817   if (numStreams != 1)
1818     return E_INVALIDARG;
1819 
1820   UInt32 numEncoderThreads = 1;
1821   UInt32 numSubDecoderThreads = 1;
1822 
1823   #ifndef Z7_ST
1824     numEncoderThreads = numThreads;
1825 
1826     if (oldLzmaBenchMode)
1827     if (methodId == k_LZMA)
1828     {
1829       if (numThreads == 1 && method.Get_NumThreads() < 0)
1830         method.AddProp_NumThreads(1);
1831       const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1832       if (numThreads > 1 && numLzmaThreads > 1)
1833       {
1834         numEncoderThreads = (numThreads + 1) / 2; // 20.03
1835         numSubDecoderThreads = 2;
1836       }
1837     }
1838 
1839   const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1840 
1841   #endif
1842 
1843   CBenchEncoders encodersSpec(numEncoderThreads);
1844   CEncoderInfo *encoders = encodersSpec.encoders;
1845 
1846   UInt32 i;
1847 
1848   for (i = 0; i < numEncoderThreads; i++)
1849   {
1850     CEncoderInfo &encoder = encoders[i];
1851     encoder.callback = (i == 0) ? callback : NULL;
1852     encoder.printCallback = printCallback;
1853 
1854     #ifndef Z7_ST
1855     encoder.EncoderIndex = i;
1856     encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1857     encoder.AffinityMode = *affinityMode;
1858 
1859     /*
1860     if (numSubDecoderThreads > 1)
1861     if (encoder.AffinityMode.NeedAffinity()
1862         && encoder.AffinityMode.NumBundleThreads == 1)
1863     {
1864       // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1865       if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1866         encoder.AffinityMode.NumBundleThreads *= 2;
1867     }
1868     */
1869 
1870     #endif
1871 
1872     {
1873       CCreatedCoder cod;
1874       RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1875       encoder._encoder = cod.Coder;
1876       if (!encoder._encoder && !encoder._encoderFilter)
1877         return E_NOTIMPL;
1878     }
1879 
1880     SetPseudoRand(encoder._iv,  sizeof(encoder._iv), 17);
1881     SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1882     SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1883 
1884     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1885     {
1886       CCreatedCoder cod;
1887       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1888       RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1889       decoder = cod.Coder;
1890       if (!encoder._decoderFilter && !decoder)
1891         return E_NOTIMPL;
1892     }
1893 
1894     encoder.UseRealData_Enc =
1895     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1896 
1897     encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1898     if (benchProps->DecComplexCompr +
1899         benchProps->DecComplexUnc <= 30)
1900       encoder.CheckCrcMode_Dec =
1901           k_CheckCrcMode_FirstPass; // for filters
1902           // k_CheckCrcMode_Never; // for debug
1903           // k_CheckCrcMode_Always; // for debug
1904     if (fileData)
1905     {
1906       encoder.UseRealData_Enc = true;
1907       encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1908     }
1909   }
1910 
1911   UInt32 crc = 0;
1912   if (fileData)
1913     crc = CrcCalc(fileData, uncompressedDataSize);
1914 
1915   for (i = 0; i < numEncoderThreads; i++)
1916   {
1917     CEncoderInfo &encoder = encoders[i];
1918     encoder._method = method;
1919     encoder.generateDictBits = generateDictBits;
1920     encoder._uncompressedDataSize = uncompressedDataSize;
1921     encoder.kBufferSize = uncompressedDataSize;
1922     encoder.fileData = fileData;
1923     encoder.crc = crc;
1924   }
1925 
1926   CBenchProgressStatus status;
1927   status.Res = S_OK;
1928   status.EncodeMode = true;
1929 
1930   #ifndef Z7_ST
1931   CBenchThreadsFlusher encoderFlusher;
1932   if (mtEncMode)
1933   {
1934     WRes wres = encoderFlusher.Common.StartEvent.Create();
1935     if (wres != 0)
1936       return HRESULT_FROM_WIN32(wres);
1937     encoderFlusher.NumThreads = numEncoderThreads;
1938     encoderFlusher.EncodersSpec = &encodersSpec;
1939     encoderFlusher.NeedClose = true;
1940   }
1941   #endif
1942 
1943   for (i = 0; i < numEncoderThreads; i++)
1944   {
1945     CEncoderInfo &encoder = encoders[i];
1946     encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1947     // encoder.NumIterations = 3;
1948     {
1949 #if 0
1950       #define kCrcPoly 0xEDB88320
1951       UInt32 r = i;
1952       unsigned num = numEncoderThreads < 256 ? 8 : 16;
1953       do
1954         r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
1955       while (--num);
1956       encoder.Salt = r;
1957 #else
1958       UInt32 salt0 = g_CrcTable[(Byte)i];
1959       UInt32 salt1 = g_CrcTable[(Byte)(i >> 8)];
1960       encoder.Salt = salt0 ^ (salt1 << 3);
1961 #endif
1962     }
1963 
1964     // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1965     // printf("\n encoder index = %d, Salt = %8x\n", i, encoder.Salt);
1966 
1967     encoder.KeySize = benchProps->KeySize;
1968 
1969     for (int j = 0; j < 2; j++)
1970     {
1971       CBenchProgressInfo *spec = new CBenchProgressInfo;
1972       encoder.progressInfoSpec[j] = spec;
1973       encoder.progressInfo[j] = spec;
1974       spec->Status = &status;
1975     }
1976 
1977     if (i == 0)
1978     {
1979       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1980       bpi->Callback = callback;
1981       bpi->BenchInfo.NumIterations = numEncoderThreads;
1982     }
1983 
1984     #ifndef Z7_ST
1985     if (mtEncMode)
1986     {
1987       #ifdef USE_ALLOCA
1988       encoder.AllocaSize = BENCH_ALLOCA_VALUE(i);
1989       #endif
1990 
1991       encoder.Common = &encoderFlusher.Common;
1992       encoder.IsGlobalMtMode = numEncoderThreads > 1;
1993       RINOK(encoder.CreateEncoderThread())
1994     }
1995     #endif
1996   }
1997 
1998   if (printCallback)
1999   {
2000     RINOK(printCallback->CheckBreak())
2001   }
2002 
2003   #ifndef Z7_ST
2004   if (mtEncMode)
2005   {
2006     for (i = 0; i < numEncoderThreads; i++)
2007     {
2008       CEncoderInfo &encoder = encoders[i];
2009       const WRes wres = encoder.ReadyEvent.Lock();
2010       if (wres != 0)
2011         return HRESULT_FROM_WIN32(wres);
2012       RINOK(encoder.Results[0])
2013     }
2014 
2015     CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
2016     bpi->SetStartTime();
2017 
2018     const WRes wres = encoderFlusher.StartAndWait();
2019     if (status.Res == 0 && wres != 0)
2020       return HRESULT_FROM_WIN32(wres);
2021   }
2022   else
2023   #endif
2024   {
2025     RINOK(encoders[0].Encode())
2026   }
2027 
2028   RINOK(status.Res)
2029 
2030   CBenchInfo info;
2031 
2032   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2033   info.UnpackSize = 0;
2034   info.PackSize = 0;
2035   info.NumIterations = encoders[0].NumIterations;
2036 
2037   for (i = 0; i < numEncoderThreads; i++)
2038   {
2039     const CEncoderInfo &encoder = encoders[i];
2040     info.UnpackSize += encoder.kBufferSize;
2041     info.PackSize += encoder.compressedSize;
2042     // printf("\n%7d\n", encoder.compressedSize);
2043   }
2044 
2045   RINOK(callback->SetEncodeResult(info, true))
2046 
2047 
2048 
2049 
2050   // ---------- Decode ----------
2051 
2052   status.Res = S_OK;
2053   status.EncodeMode = false;
2054 
2055   const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2056   #ifndef Z7_ST
2057   const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2058   #endif
2059 
2060   for (i = 0; i < numEncoderThreads; i++)
2061   {
2062     CEncoderInfo &encoder = encoders[i];
2063 
2064     /*
2065     #ifndef Z7_ST
2066     // encoder.affinityMode = *affinityMode;
2067     if (encoder.NumEncoderInternalThreads != 1)
2068       encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2069     #endif
2070     */
2071 
2072 
2073     if (i == 0)
2074     {
2075       encoder.NumIterations = GetNumIterations(
2076           benchProps->GetNumCommands_Dec(
2077               encoder.compressedSize,
2078               encoder.kBufferSize),
2079           complexInCommands);
2080       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2081       bpi->Callback = callback;
2082       bpi->BenchInfo.NumIterations = numDecoderThreads;
2083       bpi->SetStartTime();
2084     }
2085     else
2086       encoder.NumIterations = encoders[0].NumIterations;
2087 
2088     #ifndef Z7_ST
2089     {
2090       const int numSubThreads = method.Get_NumThreads();
2091       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2092     }
2093     if (mtDecoderMode)
2094     {
2095       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2096       {
2097         const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2098             #ifdef USE_ALLOCA
2099             , BENCH_ALLOCA_VALUE(i * numSubDecoderThreads + j)
2100             #endif
2101             );
2102         RINOK(res)
2103       }
2104     }
2105     else
2106     #endif
2107     {
2108       RINOK(encoder.Decode(0))
2109     }
2110   }
2111 
2112   #ifndef Z7_ST
2113   if (mtDecoderMode)
2114   {
2115     WRes wres = 0;
2116     HRESULT res = S_OK;
2117     for (i = 0; i < numEncoderThreads; i++)
2118       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2119       {
2120         CEncoderInfo &encoder = encoders[i];
2121         const WRes wres2 = encoder.thread[j].
2122             // Wait(); // later we can get thread times from thread in UNDER_CE
2123             Wait_Close();
2124         if (wres == 0 && wres2 != 0)
2125           wres = wres2;
2126         const HRESULT res2 = encoder.Results[j];
2127         if (res == 0 && res2 != 0)
2128           res = res2;
2129       }
2130     if (wres != 0)
2131       return HRESULT_FROM_WIN32(wres);
2132     RINOK(res)
2133   }
2134   #endif // Z7_ST
2135 
2136   RINOK(status.Res)
2137   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2138 
2139   /*
2140   #ifndef Z7_ST
2141   #ifdef UNDER_CE
2142   if (mtDecoderMode)
2143     for (i = 0; i < numEncoderThreads; i++)
2144       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2145       {
2146         FILETIME creationTime, exitTime, kernelTime, userTime;
2147         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2148           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2149       }
2150   #endif
2151   #endif
2152   */
2153 
2154   info.UnpackSize = 0;
2155   info.PackSize = 0;
2156   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2157 
2158   for (i = 0; i < numEncoderThreads; i++)
2159   {
2160     const CEncoderInfo &encoder = encoders[i];
2161     info.UnpackSize += encoder.kBufferSize;
2162     info.PackSize += encoder.compressedSize;
2163   }
2164 
2165   // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2166   RINOK(callback->SetDecodeResult(info, true))
2167 
2168   return S_OK;
2169 }
2170 
2171 
2172 
2173 static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2174 {
2175   /*
2176   if (dictSizeLog < 32)
2177     return (UInt32)1 << dictSizeLog;
2178   else
2179     return (UInt32)(Int32)-1;
2180   */
2181   return (UInt64)1 << dictSizeLog;
2182 }
2183 
2184 
2185 // it's limit of current LZMA implementation that can be changed later
2186 #define kLzmaMaxDictSize ((UInt32)15 << 28)
2187 
2188 static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2189 {
2190   if (dict == 0)
2191     dict = 1;
2192   if (dict > kLzmaMaxDictSize)
2193     dict = kLzmaMaxDictSize;
2194   UInt32 hs = (UInt32)dict - 1;
2195   hs |= (hs >> 1);
2196   hs |= (hs >> 2);
2197   hs |= (hs >> 4);
2198   hs |= (hs >> 8);
2199   hs >>= 1;
2200   hs |= 0xFFFF;
2201   if (hs > (1 << 24))
2202     hs >>= 1;
2203   hs++;
2204   hs += (1 << 16);
2205 
2206   const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2207   UInt64 blockSize = (UInt64)dict + (1 << 16)
2208       + (multiThread ? (1 << 20) : 0);
2209   blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2210   if (blockSize >= kBlockSizeMax)
2211     blockSize = kBlockSizeMax;
2212 
2213   UInt64 son = (UInt64)dict;
2214   if (btMode)
2215     son *= 2;
2216   const UInt64 v = (hs + son) * 4 + blockSize +
2217       (1 << 20) + (multiThread ? (6 << 20) : 0);
2218 
2219   // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2220   // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2221   return v;
2222 }
2223 
2224 
2225 UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2226 {
2227   const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2228   const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2229   if (level < 0)
2230     level = 5;
2231   const int algo = (level < 5 ? 0 : 1);
2232   const int btMode = (algo == 0 ? 0 : 1);
2233 
2234   UInt32 numBigThreads = numThreads;
2235   const bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2236   if (btMode)
2237   {
2238     if (!totalBench && lzmaMt)
2239       numBigThreads /= 2;
2240   }
2241   return ((UInt64)kBufferSize + kCompressedBufferSize +
2242     GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2243 }
2244 
2245 static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2246 {
2247   // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2248   return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2249 }
2250 
2251 
2252 // ---------- CRC and HASH ----------
2253 
2254 struct CCrcInfo_Base
2255 {
2256   CMidAlignedBuffer Buffer;
2257   const Byte *Data;
2258   size_t Size;
2259   bool CreateLocalBuf;
2260   UInt32 CheckSum_Res;
2261 
2262   CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2263 
2264   HRESULT Generate(const Byte *data, size_t size);
2265   HRESULT CrcProcess(UInt64 numIterations,
2266       const UInt32 *checkSum, IHasher *hf,
2267       IBenchPrintCallback *callback);
2268 };
2269 
2270 
2271 // for debug: define it to test hash calling with unaligned data
2272 // #define Z7_BENCH_HASH_ALIGN_BUF_OFFSET  3
2273 
2274 HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2275 {
2276   Size = size;
2277   Data = data;
2278   if (!data || CreateLocalBuf)
2279   {
2280     Byte *buf;
2281     const size_t size2 = (size + k_RandBuf_AlignMask) & ~(size_t)k_RandBuf_AlignMask;
2282     if (size2 < size)
2283       return E_OUTOFMEMORY;
2284 #ifdef Z7_BENCH_HASH_ALIGN_BUF_OFFSET
2285     ALLOC_WITH_HRESULT(&Buffer, size2 + Z7_BENCH_HASH_ALIGN_BUF_OFFSET)
2286     buf = Buffer + Z7_BENCH_HASH_ALIGN_BUF_OFFSET;
2287 #else
2288     ALLOC_WITH_HRESULT(&Buffer, size2)
2289     buf = Buffer;
2290 #endif
2291     Data = buf;
2292     if (!data)
2293       RandGen_BufAfterPad(buf, size);
2294     else if (size != 0) // (CreateLocalBuf == true)
2295       memcpy(buf, data, size);
2296   }
2297   return S_OK;
2298 }
2299 
2300 
2301 #if 1
2302 #define HashUpdate(hf, data, size)  hf->Update(data, size)
2303 #else
2304 // for debug:
2305 static void HashUpdate(IHasher *hf, const void *data, UInt32 size)
2306 {
2307   for (;;)
2308   {
2309     if (size == 0)
2310       return;
2311     UInt32 size2 = (size * 0x85EBCA87) % size / 8;
2312     // UInt32 size2 = size / 2;
2313     if (size2 == 0)
2314       size2 = 1;
2315     hf->Update(data, size2);
2316     data = (const void *)((const Byte *)data + size2);
2317     size -= size2;
2318   }
2319 }
2320 #endif
2321 
2322 
2323 HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2324     const UInt32 *checkSum, IHasher *hf,
2325     IBenchPrintCallback *callback)
2326 {
2327   MY_ALIGN(16)
2328   UInt32 hash32[64 / 4];
2329   memset(hash32, 0, sizeof(hash32));
2330 
2331   CheckSum_Res = 0;
2332 
2333   const UInt32 hashSize = hf->GetDigestSize();
2334   if (hashSize > sizeof(hash32))
2335     return S_FALSE;
2336 
2337   const Byte *buf = Data;
2338   const size_t size = Size;
2339   UInt32 checkSum_Prev = 0;
2340 
2341   UInt64 prev = 0;
2342   UInt64 cur = 0;
2343 
2344   do
2345   {
2346     hf->Init();
2347     size_t pos = 0;
2348     do
2349     {
2350       const size_t rem = size - pos;
2351       const UInt32 kStep = ((UInt32)1 << 31);
2352       const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2353       HashUpdate(hf, buf + pos, curSize);
2354       pos += curSize;
2355     }
2356     while (pos != size);
2357 
2358     hf->Final((Byte *)(void *)hash32);
2359     UInt32 sum = 0;
2360     for (UInt32 j = 0; j < hashSize; j += 4)
2361     {
2362       sum = rotlFixed(sum, 11);
2363       sum += GetUi32((const Byte *)(const void *)hash32 + j);
2364     }
2365     if (checkSum)
2366     {
2367       if (sum != *checkSum)
2368         return S_FALSE;
2369     }
2370     else
2371     {
2372       checkSum_Prev = sum;
2373       checkSum = &checkSum_Prev;
2374     }
2375     if (callback)
2376     {
2377       cur += size;
2378       if (cur - prev >= ((UInt32)1 << 30))
2379       {
2380         prev = cur;
2381         RINOK(callback->CheckBreak())
2382       }
2383     }
2384   }
2385   while (--numIterations);
2386 
2387   CheckSum_Res = checkSum_Prev;
2388   return S_OK;
2389 }
2390 
2391 extern
2392 UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2393 UInt32 g_BenchCpuFreqTemp = 1;
2394 
2395 #define YY1 sum += val; sum ^= val;
2396 #define YY3 YY1 YY1 YY1 YY1
2397 #define YY5 YY3 YY3 YY3 YY3
2398 #define YY7 YY5 YY5 YY5 YY5
2399 static const UInt32 kNumFreqCommands = 128;
2400 
2401 EXTERN_C_BEGIN
2402 
2403 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2404 {
2405   for (UInt32 i = 0; i < num; i++)
2406   {
2407     YY7
2408   }
2409   return sum;
2410 }
2411 
2412 EXTERN_C_END
2413 
2414 
2415 #ifndef Z7_ST
2416 
2417 struct CBaseThreadInfo
2418 {
2419   NWindows::CThread Thread;
2420   IBenchPrintCallback *Callback;
2421   HRESULT CallbackRes;
2422 
2423   WRes Wait_If_Created()
2424   {
2425     if (!Thread.IsCreated())
2426       return 0;
2427     return Thread.Wait_Close();
2428   }
2429 };
2430 
2431 struct CFreqInfo: public CBaseThreadInfo
2432 {
2433   UInt32 ValRes;
2434   UInt32 Size;
2435   UInt64 NumIterations;
2436 };
2437 
2438 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2439 {
2440   CFreqInfo *p = (CFreqInfo *)param;
2441 
2442   UInt32 sum = g_BenchCpuFreqTemp;
2443   for (UInt64 k = p->NumIterations; k > 0; k--)
2444   {
2445     if (p->Callback)
2446     {
2447       p->CallbackRes = p->Callback->CheckBreak();
2448       if (p->CallbackRes != S_OK)
2449         break;
2450     }
2451     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2452   }
2453   p->ValRes = sum;
2454   return THREAD_FUNC_RET_ZERO;
2455 }
2456 
2457 struct CFreqThreads
2458 {
2459   CFreqInfo *Items;
2460   UInt32 NumThreads;
2461 
2462   CFreqThreads(): Items(NULL), NumThreads(0) {}
2463 
2464   WRes WaitAll()
2465   {
2466     WRes wres = 0;
2467     for (UInt32 i = 0; i < NumThreads; i++)
2468     {
2469       WRes wres2 = Items[i].Wait_If_Created();
2470       if (wres == 0 && wres2 != 0)
2471         wres = wres2;
2472     }
2473     NumThreads = 0;
2474     return wres;
2475   }
2476 
2477   ~CFreqThreads()
2478   {
2479     WaitAll();
2480     delete []Items;
2481   }
2482 };
2483 
2484 
2485 static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2486 
2487 struct CCrcInfo: public CBaseThreadInfo
2488 {
2489   const Byte *Data;
2490   size_t Size;
2491   UInt64 NumIterations;
2492   bool CheckSumDefined;
2493   UInt32 CheckSum;
2494   CMyComPtr<IHasher> Hasher;
2495   HRESULT Res;
2496   UInt32 CheckSum_Res;
2497 
2498   #ifndef Z7_ST
2499   NSynchronization::CManualResetEvent ReadyEvent;
2500   UInt32 ThreadIndex;
2501   CBenchSyncCommon *Common;
2502   CAffinityMode AffinityMode;
2503   #endif
2504 
2505   // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2506   // so we uses non-local CCrcInfo_Base.
2507   CCrcInfo_Base crcib;
2508 
2509   HRESULT CreateThread()
2510   {
2511     WRes res = 0;
2512     if (!ReadyEvent.IsCreated())
2513       res = ReadyEvent.Create();
2514     if (res == 0)
2515       res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2516           ThreadIndex);
2517     return HRESULT_FROM_WIN32(res);
2518   }
2519 
2520   #ifdef USE_ALLOCA
2521   size_t AllocaSize;
2522   #endif
2523 
2524   void Process();
2525 
2526   CCrcInfo(): Res(E_FAIL) {}
2527 };
2528 
2529 static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2530 // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2531 
2532 void CCrcInfo::Process()
2533 {
2534   crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2535   // we can use additional Generate() passes to reduce some time effects for new page allocation
2536   // for (unsigned y = 0; y < 10; y++)
2537   Res = crcib.Generate(Data, Size);
2538 
2539   // if (Common)
2540   {
2541     WRes wres = ReadyEvent.Set();
2542     if (wres != 0)
2543     {
2544       if (Res == 0)
2545         Res = HRESULT_FROM_WIN32(wres);
2546       return;
2547     }
2548     if (Res != 0)
2549       return;
2550 
2551     wres = Common->StartEvent.Lock();
2552 
2553     if (wres != 0)
2554     {
2555       Res = HRESULT_FROM_WIN32(wres);
2556       return;
2557     }
2558     if (Common->ExitMode)
2559       return;
2560   }
2561 
2562   Res = crcib.CrcProcess(NumIterations,
2563       CheckSumDefined ? &CheckSum : NULL, Hasher,
2564       Callback);
2565   CheckSum_Res = crcib.CheckSum_Res;
2566   /*
2567   We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2568   to time of benchmark. So we don't free Buffer here
2569   */
2570   // crcib.Buffer.Free();
2571 }
2572 
2573 
2574 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2575 {
2576   CCrcInfo *p = (CCrcInfo *)param;
2577 
2578   #ifdef USE_ALLOCA
2579   alloca(p->AllocaSize);
2580   #endif
2581   p->Process();
2582   return THREAD_FUNC_RET_ZERO;
2583 }
2584 
2585 
2586 struct CCrcThreads
2587 {
2588   CCrcInfo *Items;
2589   unsigned NumThreads;
2590   CBenchSyncCommon Common;
2591   bool NeedClose;
2592 
2593   CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2594 
2595   WRes StartAndWait(bool exitMode = false);
2596 
2597   ~CCrcThreads()
2598   {
2599     StartAndWait(true);
2600     delete []Items;
2601   }
2602 };
2603 
2604 
2605 WRes CCrcThreads::StartAndWait(bool exitMode)
2606 {
2607   if (!NeedClose)
2608     return 0;
2609 
2610   Common.ExitMode = exitMode;
2611   WRes wres = Common.StartEvent.Set();
2612 
2613   for (unsigned i = 0; i < NumThreads; i++)
2614   {
2615     WRes wres2 = Items[i].Wait_If_Created();
2616     if (wres == 0 && wres2 != 0)
2617       wres = wres2;
2618   }
2619   NumThreads = 0;
2620   NeedClose = false;
2621   return wres;
2622 }
2623 
2624 #endif
2625 
2626 
2627 /*
2628 static UInt32 CrcCalc1(const Byte *buf, size_t size)
2629 {
2630   UInt32 crc = CRC_INIT_VAL;
2631   for (size_t i = 0; i < size; i++)
2632     crc = CRC_UPDATE_BYTE(crc, buf[i]);
2633   return CRC_GET_DIGEST(crc);
2634 }
2635 */
2636 
2637 /*
2638 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2639 {
2640   RandGen(buf, size, RG);
2641   return CrcCalc1(buf, size);
2642 }
2643 */
2644 
2645 static bool CrcInternalTest()
2646 {
2647   CAlignedBuffer buffer;
2648   const size_t kBufSize = 1 << 11;
2649   const size_t kCheckSize = 1 << 6;
2650   buffer.Alloc(kBufSize);
2651   if (!buffer.IsAllocated())
2652     return false;
2653   Byte *buf = (Byte *)buffer;
2654   RandGen_BufAfterPad(buf, kBufSize);
2655   UInt32 sum = 0;
2656   for (size_t i = 0; i < kBufSize - kCheckSize * 2; i += kCheckSize - 1)
2657     for (size_t j = 0; j < kCheckSize; j++)
2658     {
2659       sum = rotlFixed(sum, 11);
2660       sum += CrcCalc(buf + i + j, j);
2661     }
2662   return sum == 0x28462c7c;
2663 }
2664 
2665 struct CBenchMethod
2666 {
2667   unsigned Weight;
2668   unsigned DictBits;
2669   Int32 EncComplex;
2670   Int32 DecComplexCompr;
2671   Int32 DecComplexUnc;
2672   const char *Name;
2673   // unsigned KeySize;
2674 };
2675 
2676 // #define USE_SW_CMPLX
2677 
2678 #ifdef USE_SW_CMPLX
2679 #define CMPLX(x) ((x) * 1000)
2680 #else
2681 #define CMPLX(x) (x)
2682 #endif
2683 
2684 static const CBenchMethod g_Bench[] =
2685 {
2686   // { 40, 17,  357,  145,   20, "LZMA:x1" },
2687   // { 20, 18,  360,  145,   20, "LZMA2:x1:mt2" },
2688 
2689   { 20, 18,  360,  145,   20, "LZMA:x1" },
2690   { 20, 22,  600,  145,   20, "LZMA:x3" },
2691 
2692   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
2693   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
2694 
2695   { 10, 16,  124,   40,   14, "Deflate:x1" },
2696   { 20, 16,  376,   40,   14, "Deflate:x5" },
2697   { 10, 16, 1082,   40,   14, "Deflate:x7" },
2698   { 10, 17,  422,   40,   14, "Deflate64:x5" },
2699 
2700   { 10, 15,  590,   69,   69, "BZip2:x1" },
2701   { 20, 19,  815,  122,  122, "BZip2:x5" },
2702   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
2703   { 10, 19, 2530,  122,  122, "BZip2:x7" },
2704 
2705   // { 10, 18, 1010,    0, 1150, "PPMDZip:x1" },
2706   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
2707   // { 10, 22, 1655,    0, 1830, "PPMDZip:x5" },
2708   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
2709 
2710   // {  2,  0,  -16,    0,  -16, "Swap2" },
2711   {  2,  0,  -16,    0,  -16, "Swap4" },
2712 
2713   // {  2,  0,    3,    0,    4, "Delta:1" },
2714   // {  2,  0,    3,    0,    4, "Delta:2" },
2715   // {  2,  0,    3,    0,    4, "Delta:3" },
2716   {  2,  0,    3,    0,    4, "Delta:4" },
2717   // {  2,  0,    3,    0,    4, "Delta:8" },
2718   // {  2,  0,    3,    0,    4, "Delta:32" },
2719 
2720   {  2,  0,    2,    0,    2, "BCJ" },
2721   {  2,  0,    1,    0,    1, "ARM64" },
2722   {  2,  0,    1,    0,    1, "RISCV" },
2723 
2724   // { 10,  0,   18,    0,   18, "AES128CBC:1" },
2725   // { 10,  0,   21,    0,   21, "AES192CBC:1" },
2726   { 10,  0,   24,    0,   24, "AES256CBC:1" },
2727 
2728   // { 10,  0,   18,    0,   18, "AES128CTR:1" },
2729   // { 10,  0,   21,    0,   21, "AES192CTR:1" },
2730   // { 10,  0,   24,    0,   24, "AES256CTR:1" },
2731   // {  2,  0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2732   // {  2,  0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2733   {  2,  0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2734 
2735   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2736   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2737   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2738 
2739   // {  1,  0, CMPLX(6), 0, -2, "AES128CBC:3" },
2740   // {  1,  0, CMPLX(7), 0, -2, "AES192CBC:3" },
2741   {  1,  0, CMPLX(8), 0, -2, "AES256CBC:3" }
2742 
2743   // {  1,  0, CMPLX(1), 0, -2, "AES128CTR:3" },
2744   // {  1,  0, CMPLX(1), 0, -2, "AES192CTR:3" },
2745   // {  1,  0, CMPLX(1), 0, -2, "AES256CTR:3" },
2746 };
2747 
2748 struct CBenchHash
2749 {
2750   unsigned Weight;
2751   UInt32 Complex;
2752   UInt32 CheckSum;
2753   const char *Name;
2754 };
2755 
2756 // #define ARM_CRC_MUL 100
2757 #define ARM_CRC_MUL 1
2758 
2759 #define k_Hash_Complex_Mult 256
2760 
2761 static const CBenchHash g_Hash[] =
2762 {
2763   { 20,   256, 0x21e207bb, "CRC32:12" } ,
2764   {  2,   128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2765   {  2,    64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2766   { 10,   256, 0x41b901d1, "CRC64" },
2767   {  5,    64, 0x43eac94f, "XXH64" },
2768   {  2,  2340, 0x3398a904, "MD5" },
2769   { 10,  2340,                       0xff769021, "SHA1:1" },
2770   {  2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2771   { 10,  5100,                       0x7913ba03, "SHA256:1" },
2772   {  2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2773   {  5,  3200,                       0xe7aeb394, "SHA512:1" },
2774   {  2, CMPLX((40 * 4 + 1) * 4 + 4), 0xe7aeb394, "SHA512:2" },
2775   // { 10, 3428,       0x1cc99b18, "SHAKE128" },
2776   // { 10, 4235,       0x74eaddc3, "SHAKE256" },
2777   // { 10, 4000,       0xdf3e6863, "SHA3-224" },
2778   {  5, 4200,       0xcecac10d, "SHA3-256" },
2779   // { 10, 5538,       0x4e5d9163, "SHA3-384" },
2780   // { 10, 8000,       0x96a58289, "SHA3-512" },
2781   {  2,  4096, 0x85189d02, "BLAKE2sp:1" },
2782   {  2,  1024, 0x85189d02, "BLAKE2sp:2" }, // sse2-way4-fast
2783   {  2,   512, 0x85189d02, "BLAKE2sp:3" }  // avx2-way8-fast
2784 #if 0
2785   , {  2,  2048, 0x85189d02, "BLAKE2sp:4" } // sse2-way1
2786   , {  2,  1024, 0x85189d02, "BLAKE2sp:5" } // sse2-way2
2787   , {  2,  1024, 0x85189d02, "BLAKE2sp:6" } // avx2-way2
2788   , {  2,  1024, 0x85189d02, "BLAKE2sp:7" } // avx2-way4
2789 #endif
2790 };
2791 
2792 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2793 {
2794   char s[128];
2795   unsigned startPos = (unsigned)sizeof(s) - 32;
2796   memset(s, ' ', startPos);
2797   ConvertUInt64ToString(value, s + startPos);
2798   // if (withSpace)
2799   {
2800     startPos--;
2801     size++;
2802   }
2803   unsigned len = (unsigned)strlen(s + startPos);
2804   if (size > len)
2805   {
2806     size -= len;
2807     if (startPos < size)
2808       startPos = 0;
2809     else
2810       startPos -= size;
2811   }
2812   f.Print(s + startPos);
2813 }
2814 
2815 static const unsigned kFieldSize_Name = 12;
2816 static const unsigned kFieldSize_SmallName = 4;
2817 static const unsigned kFieldSize_Speed = 9;
2818 static const unsigned kFieldSize_Usage = 5;
2819 static const unsigned kFieldSize_RU = 6;
2820 static const unsigned kFieldSize_Rating = 6;
2821 static const unsigned kFieldSize_EU = 5;
2822 static const unsigned kFieldSize_Effec = 5;
2823 static const unsigned kFieldSize_CrcSpeed = 8;
2824 
2825 
2826 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2827 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2828 
2829 
2830 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2831 {
2832   PrintNumber(f, (rating + 500000) / 1000000, size);
2833 }
2834 
2835 
2836 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2837 {
2838   UInt64 v = 0;
2839   if (divider != 0)
2840     v = (val * 100 + divider / 2) / divider;
2841   PrintNumber(f, v, size);
2842 }
2843 
2844 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2845 {
2846   char s[256];
2847   memset(s, (Byte)c, size);
2848   s[size] = 0;
2849   f.Print(s);
2850 }
2851 
2852 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2853 {
2854   PrintChars(f, ' ', size);
2855 }
2856 
2857 static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2858 {
2859   PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2860 }
2861 
2862 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2863 {
2864   PrintUsage(f, usage, kFieldSize_Usage);
2865   PrintRating(f, rpu, kFieldSize_RU);
2866   PrintRating(f, rating, kFieldSize_Rating);
2867   if (showFreq)
2868   {
2869     if (cpuFreq == 0)
2870       PrintSpaces(f, kFieldSize_EUAndEffec);
2871     else
2872     {
2873       PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2874       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2875     }
2876   }
2877 }
2878 
2879 
2880 void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2881 {
2882   Speed = info.GetUnpackSizeSpeed();
2883   Usage = info.GetUsage();
2884   RPU = info.GetRatingPerUsage(Rating);
2885 }
2886 
2887 void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2888 {
2889   NumIterations2 *= weight;
2890   RPU *= weight;
2891   Rating *= weight;
2892   Usage *= weight;
2893   Speed *= weight;
2894 }
2895 
2896 void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2897 {
2898   Rating += r.Rating;
2899   Usage += r.Usage;
2900   RPU += r.RPU;
2901   Speed += r.Speed;
2902     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2903   NumIterations2 += r.NumIterations2;
2904 }
2905 
2906 static void PrintResults(IBenchPrintCallback *f,
2907     const CBenchInfo &info,
2908     unsigned weight,
2909     UInt64 rating,
2910     bool showFreq, UInt64 cpuFreq,
2911     CTotalBenchRes *res)
2912 {
2913   CTotalBenchRes t;
2914   t.Rating = rating;
2915   t.NumIterations2 = 1;
2916   t.Generate_From_BenchInfo(info);
2917 
2918   if (f)
2919   {
2920     if (t.Speed != 0)
2921       PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2922     else
2923       PrintSpaces(*f, 1 + kFieldSize_Speed);
2924   }
2925   if (f)
2926   {
2927     PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2928   }
2929 
2930   if (res)
2931   {
2932     // res->NumIterations1++;
2933     t.Mult_For_Weight(weight);
2934     res->Update_With_Res(t);
2935   }
2936 }
2937 
2938 static void PrintTotals(IBenchPrintCallback &f,
2939     bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2940 {
2941   const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2942   const UInt64 speed = res.Speed / numIterations2;
2943   if (showSpeed && speed != 0)
2944     PrintNumber(f, speed / 1024, kFieldSize_Speed);
2945   else
2946     PrintSpaces(f, 1 + kFieldSize_Speed);
2947 
2948   // PrintSpaces(f, 1 + kFieldSize_Speed);
2949   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2950   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2951 }
2952 
2953 
2954 static void PrintHex(AString &s, UInt64 v)
2955 {
2956   char temp[32];
2957   ConvertUInt64ToHex(v, temp);
2958   s += temp;
2959 }
2960 
2961 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2962 {
2963   AString s;
2964   // s.Add_UInt32(ti.numProcessThreads);
2965   unsigned numSysThreads = ti.GetNumSystemThreads();
2966   if (ti.GetNumProcessThreads() != numSysThreads)
2967   {
2968     // if (ti.numProcessThreads != ti.numSysThreads)
2969     {
2970       s += " / ";
2971       s.Add_UInt32(numSysThreads);
2972     }
2973     s += " : ";
2974     #ifdef _WIN32
2975     PrintHex(s, ti.processAffinityMask);
2976     s += " / ";
2977     PrintHex(s, ti.systemAffinityMask);
2978     #else
2979     unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2980     if (i == 0)
2981       i = 4;
2982     for (; i >= 4; )
2983     {
2984       i -= 4;
2985       unsigned val = 0;
2986       for (unsigned k = 0; k < 4; k++)
2987       {
2988         const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2989         val += (bit << k);
2990       }
2991       PrintHex(s, val);
2992     }
2993     #endif
2994   }
2995   return s;
2996 }
2997 
2998 
2999 #ifdef Z7_LARGE_PAGES
3000 
3001 #ifdef _WIN32
3002 extern bool g_LargePagesMode;
3003 extern "C"
3004 {
3005   extern SIZE_T g_LargePageSize;
3006 }
3007 #endif
3008 
3009 void Add_LargePages_String(AString &s)
3010 {
3011   #ifdef _WIN32
3012   if (g_LargePagesMode || g_LargePageSize != 0)
3013   {
3014     s.Add_OptSpaced("(LP-");
3015     PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
3016     #ifdef MY_CPU_X86_OR_AMD64
3017     if (CPU_IsSupported_PageGB())
3018       s += "-1G";
3019     #endif
3020     if (!g_LargePagesMode)
3021       s += "-NA";
3022     s += ")";
3023   }
3024   #else
3025     s += "";
3026   #endif
3027 }
3028 
3029 #endif
3030 
3031 
3032 
3033 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
3034     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
3035 {
3036   f.Print("RAM ");
3037   f.Print(sizeString);
3038   if (size_Defined)
3039     PrintNumber(f, (size >> 20), 6);
3040   else
3041     f.Print("      ?");
3042   f.Print(" MB");
3043 
3044   #ifdef Z7_LARGE_PAGES
3045   {
3046     AString s;
3047     Add_LargePages_String(s);
3048     f.Print(s);
3049   }
3050   #endif
3051 
3052   f.Print(",  # ");
3053   f.Print(threadsString);
3054   PrintNumber(f, numThreads, 3);
3055 }
3056 
3057 
3058 
3059 struct CBenchCallbackToPrint Z7_final: public IBenchCallback
3060 {
3061   bool NeedPrint;
3062   bool Use2Columns;
3063   bool ShowFreq;
3064   unsigned NameFieldSize;
3065 
3066   unsigned EncodeWeight;
3067   unsigned DecodeWeight;
3068 
3069   UInt64 CpuFreq;
3070   UInt64 DictSize;
3071 
3072   IBenchPrintCallback *_file;
3073   CBenchProps BenchProps;
3074   CTotalBenchRes EncodeRes;
3075   CTotalBenchRes DecodeRes;
3076 
3077   CBenchInfo BenchInfo_Results[2];
3078 
3079   CBenchCallbackToPrint():
3080       NeedPrint(true),
3081       Use2Columns(false),
3082       ShowFreq(false),
3083       NameFieldSize(0),
3084       EncodeWeight(1),
3085       DecodeWeight(1),
3086       CpuFreq(0)
3087       {}
3088 
3089   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3090   void Print(const char *s);
3091   void NewLine();
3092 
3093   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3094   HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3095   HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3096 };
3097 
3098 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3099 {
3100   ShowFreq = showFreq;
3101   CpuFreq = cpuFreq;
3102   return S_OK;
3103 }
3104 
3105 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3106 {
3107   RINOK(_file->CheckBreak())
3108   if (final)
3109     BenchInfo_Results[0] = info;
3110   if (final)
3111   if (NeedPrint)
3112   {
3113     const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3114     PrintResults(_file, info,
3115         EncodeWeight, rating,
3116         ShowFreq, CpuFreq, &EncodeRes);
3117     if (!Use2Columns)
3118       _file->NewLine();
3119   }
3120   return S_OK;
3121 }
3122 
3123 static const char * const kSep = "  | ";
3124 
3125 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3126 {
3127   RINOK(_file->CheckBreak())
3128   if (final)
3129     BenchInfo_Results[1] = info;
3130   if (final)
3131   if (NeedPrint)
3132   {
3133     const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3134     if (Use2Columns)
3135       _file->Print(kSep);
3136     else
3137       PrintSpaces(*_file, NameFieldSize);
3138     CBenchInfo info2 = info;
3139     info2.UnpackSize *= info2.NumIterations;
3140     info2.PackSize *= info2.NumIterations;
3141     info2.NumIterations = 1;
3142     PrintResults(_file, info2,
3143         DecodeWeight, rating,
3144         ShowFreq, CpuFreq, &DecodeRes);
3145   }
3146   return S_OK;
3147 }
3148 
3149 void CBenchCallbackToPrint::Print(const char *s)
3150 {
3151   _file->Print(s);
3152 }
3153 
3154 void CBenchCallbackToPrint::NewLine()
3155 {
3156   _file->NewLine();
3157 }
3158 
3159 static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3160 {
3161   f.Print(s);
3162   int numSpaces = (int)size - (int)MyStringLen(s);
3163   if (numSpaces > 0)
3164     PrintSpaces(f, (unsigned)numSpaces);
3165 }
3166 
3167 static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3168 {
3169   int numSpaces = (int)size - (int)MyStringLen(s);
3170   if (numSpaces > 0)
3171     PrintSpaces(f, (unsigned)numSpaces);
3172   f.Print(s);
3173 }
3174 
3175 
3176 static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3177 {
3178   UString wildc = GetUnicodeString(mask);
3179   UString bname = GetUnicodeString(name);
3180   wildc.MakeLower_Ascii();
3181   bname.MakeLower_Ascii();
3182   return DoesWildcardMatchName(wildc, bname);
3183 }
3184 
3185 
3186 static HRESULT TotalBench(
3187     DECL_EXTERNAL_CODECS_LOC_VARS
3188     const COneMethodInfo &methodMask,
3189     UInt64 complexInCommands,
3190   #ifndef Z7_ST
3191     UInt32 numThreads,
3192     const CAffinityMode *affinityMode,
3193   #endif
3194     bool forceUnpackSize,
3195     size_t unpackSize,
3196     const Byte *fileData,
3197     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3198 {
3199   for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3200   {
3201     const CBenchMethod &bench = g_Bench[i];
3202     if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3203       continue;
3204     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3205     {
3206       unsigned keySize = 32;
3207            if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3208       else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3209       callback->BenchProps.KeySize = keySize;
3210     }
3211     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3212     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3213     callback->BenchProps.EncComplex = bench.EncComplex;
3214 
3215     COneMethodInfo method;
3216     NCOM::CPropVariant propVariant;
3217     propVariant = bench.Name;
3218     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3219 
3220     size_t unpackSize2 = unpackSize;
3221     if (!forceUnpackSize && bench.DictBits == 0)
3222       unpackSize2 = kFilterUnpackSize;
3223 
3224     callback->EncodeWeight = bench.Weight;
3225     callback->DecodeWeight = bench.Weight;
3226 
3227     const HRESULT res = MethodBench(
3228         EXTERNAL_CODECS_LOC_VARS
3229         complexInCommands,
3230         #ifndef Z7_ST
3231         false, numThreads, affinityMode,
3232         #endif
3233         method,
3234         unpackSize2, fileData,
3235         bench.DictBits,
3236         printCallback, callback, &callback->BenchProps);
3237 
3238     if (res == E_NOTIMPL)
3239     {
3240       // callback->Print(" ---");
3241       // we need additional empty line as line for decompression results
3242       if (!callback->Use2Columns)
3243         callback->NewLine();
3244     }
3245     else
3246     {
3247       RINOK(res)
3248     }
3249 
3250     callback->NewLine();
3251   }
3252   return S_OK;
3253 }
3254 
3255 
3256 struct CFreqBench
3257 {
3258   // in:
3259   UInt64 complexInCommands;
3260   UInt32 numThreads;
3261   bool showFreq;
3262   UInt64 specifiedFreq;
3263 
3264   // out:
3265   UInt64 CpuFreqRes;
3266   UInt64 UsageRes;
3267   UInt32 res;
3268 
3269   CFreqBench()
3270     {}
3271 
3272   HRESULT FreqBench(IBenchPrintCallback *_file
3273       #ifndef Z7_ST
3274       , const CAffinityMode *affinityMode
3275       #endif
3276       );
3277 };
3278 
3279 
3280 HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3281     #ifndef Z7_ST
3282     , const CAffinityMode *affinityMode
3283     #endif
3284     )
3285 {
3286   res = 0;
3287   CpuFreqRes = 0;
3288   UsageRes = 0;
3289 
3290   if (numThreads == 0)
3291     numThreads = 1;
3292 
3293   #ifdef Z7_ST
3294   numThreads = 1;
3295   #endif
3296 
3297   const UInt32 complexity = kNumFreqCommands;
3298   UInt64 numIterations = complexInCommands / complexity;
3299   UInt32 numIterations2 = 1 << 30;
3300   if (numIterations > numIterations2)
3301     numIterations /= numIterations2;
3302   else
3303   {
3304     numIterations2 = (UInt32)numIterations;
3305     numIterations = 1;
3306   }
3307 
3308   CBenchInfoCalc progressInfoSpec;
3309 
3310   #ifndef Z7_ST
3311 
3312   bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3313 
3314   if (mtMode)
3315   {
3316     CFreqThreads threads;
3317     threads.Items = new CFreqInfo[numThreads];
3318     UInt32 i;
3319     for (i = 0; i < numThreads; i++)
3320     {
3321       CFreqInfo &info = threads.Items[i];
3322       info.Callback = _file;
3323       info.CallbackRes = S_OK;
3324       info.NumIterations = numIterations;
3325       info.Size = numIterations2;
3326     }
3327     progressInfoSpec.SetStartTime();
3328     for (i = 0; i < numThreads; i++)
3329     {
3330       // Sleep(10);
3331       CFreqInfo &info = threads.Items[i];
3332       WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3333       if (info.Thread.IsCreated())
3334         threads.NumThreads++;
3335       if (wres != 0)
3336         return HRESULT_FROM_WIN32(wres);
3337     }
3338     WRes wres = threads.WaitAll();
3339     if (wres != 0)
3340       return HRESULT_FROM_WIN32(wres);
3341     for (i = 0; i < numThreads; i++)
3342     {
3343       RINOK(threads.Items[i].CallbackRes)
3344     }
3345   }
3346   else
3347   #endif
3348   {
3349     progressInfoSpec.SetStartTime();
3350     UInt32 sum = g_BenchCpuFreqTemp;
3351     UInt64 k = numIterations;
3352     do
3353     {
3354       sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3355       if (_file)
3356       {
3357         RINOK(_file->CheckBreak())
3358       }
3359     }
3360     while (--k);
3361     res += sum;
3362   }
3363 
3364   if (res == 0x12345678)
3365   if (_file)
3366   {
3367     RINOK(_file->CheckBreak())
3368   }
3369 
3370   CBenchInfo info;
3371   progressInfoSpec.SetFinishTime(info);
3372 
3373   info.UnpackSize = 0;
3374   info.PackSize = 0;
3375   info.NumIterations = 1;
3376 
3377   const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3378   const UInt64 rating = info.GetSpeed(numCommands);
3379   CpuFreqRes = rating / numThreads;
3380   UsageRes = info.GetUsage();
3381 
3382   if (_file)
3383   {
3384     PrintResults(_file, info,
3385           0, // weight
3386           rating,
3387           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3388     RINOK(_file->CheckBreak())
3389   }
3390 
3391   return S_OK;
3392 }
3393 
3394 
3395 
3396 static HRESULT CrcBench(
3397     DECL_EXTERNAL_CODECS_LOC_VARS
3398     UInt64 complexInCommands,
3399     UInt32 numThreads,
3400     const size_t bufferSize,
3401     const Byte *fileData,
3402 
3403     UInt64 &speed,
3404     UInt64 &usage,
3405 
3406     UInt32 complexity, unsigned benchWeight,
3407     const UInt32 *checkSum,
3408     const COneMethodInfo &method,
3409     IBenchPrintCallback *_file,
3410     #ifndef Z7_ST
3411     const CAffinityMode *affinityMode,
3412     #endif
3413     bool showRating,
3414     CTotalBenchRes *encodeRes,
3415     bool showFreq, UInt64 cpuFreq)
3416 {
3417   if (numThreads == 0)
3418     numThreads = 1;
3419 
3420   #ifdef Z7_ST
3421   numThreads = 1;
3422   #endif
3423 
3424   const AString &methodName = method.MethodName;
3425   // methodName.RemoveChar(L'-');
3426   CMethodId hashID;
3427   if (!FindHashMethod(
3428       EXTERNAL_CODECS_LOC_VARS
3429       methodName, hashID))
3430     return E_NOTIMPL;
3431 
3432   /*
3433   // if will generate random data in each thread, instead of global data
3434   CMidAlignedBuffer buffer;
3435   if (!fileData)
3436   {
3437     ALLOC_WITH_HRESULT(&buffer, bufferSize)
3438     RandGen(buffer, bufferSize);
3439     fileData = buffer;
3440   }
3441   */
3442 
3443   const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3444   UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3445   if (numIterations == 0)
3446     numIterations = 1;
3447 
3448   CBenchInfoCalc progressInfoSpec;
3449   CBenchInfo info;
3450 
3451   #ifndef Z7_ST
3452   bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3453 
3454   if (mtEncMode)
3455   {
3456     CCrcThreads threads;
3457     threads.Items = new CCrcInfo[numThreads];
3458     {
3459       WRes wres = threads.Common.StartEvent.Create();
3460       if (wres != 0)
3461         return HRESULT_FROM_WIN32(wres);
3462       threads.NeedClose = true;
3463     }
3464 
3465     UInt32 i;
3466     for (i = 0; i < numThreads; i++)
3467     {
3468       CCrcInfo &ci = threads.Items[i];
3469       AString name;
3470       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3471       if (!ci.Hasher)
3472         return E_NOTIMPL;
3473       CMyComPtr<ICompressSetCoderProperties> scp;
3474       ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3475       if (scp)
3476       {
3477         RINOK(method.SetCoderProps(scp))
3478       }
3479 
3480       ci.Callback = _file;
3481       ci.Data = fileData;
3482       ci.NumIterations = numIterations;
3483       ci.Size = bufferSize;
3484       ci.CheckSumDefined = false;
3485       if (checkSum)
3486       {
3487         ci.CheckSum = *checkSum;
3488         ci.CheckSumDefined = true;
3489       }
3490 
3491       #ifdef USE_ALLOCA
3492       ci.AllocaSize = BENCH_ALLOCA_VALUE(i);
3493       #endif
3494     }
3495 
3496     for (i = 0; i < numThreads; i++)
3497     {
3498       CCrcInfo &ci = threads.Items[i];
3499       ci.ThreadIndex = i;
3500       ci.Common = &threads.Common;
3501       ci.AffinityMode = *affinityMode;
3502       HRESULT hres = ci.CreateThread();
3503       if (ci.Thread.IsCreated())
3504         threads.NumThreads++;
3505       if (hres != 0)
3506         return hres;
3507     }
3508 
3509     for (i = 0; i < numThreads; i++)
3510     {
3511       CCrcInfo &ci = threads.Items[i];
3512       WRes wres = ci.ReadyEvent.Lock();
3513       if (wres != 0)
3514         return HRESULT_FROM_WIN32(wres);
3515       RINOK(ci.Res)
3516     }
3517 
3518     progressInfoSpec.SetStartTime();
3519 
3520     WRes wres = threads.StartAndWait();
3521     if (wres != 0)
3522       return HRESULT_FROM_WIN32(wres);
3523 
3524     progressInfoSpec.SetFinishTime(info);
3525 
3526     for (i = 0; i < numThreads; i++)
3527     {
3528       RINOK(threads.Items[i].Res)
3529       if (i != 0)
3530         if (threads.Items[i].CheckSum_Res !=
3531             threads.Items[i - 1].CheckSum_Res)
3532           return S_FALSE;
3533     }
3534   }
3535   else
3536   #endif
3537   {
3538     CMyComPtr<IHasher> hasher;
3539     AString name;
3540     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3541     if (!hasher)
3542       return E_NOTIMPL;
3543     CMyComPtr<ICompressSetCoderProperties> scp;
3544     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3545     if (scp)
3546     {
3547       RINOK(method.SetCoderProps(scp))
3548     }
3549     CCrcInfo_Base crcib;
3550     crcib.CreateLocalBuf = false;
3551     RINOK(crcib.Generate(fileData, bufferSize))
3552     progressInfoSpec.SetStartTime();
3553     RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3554     progressInfoSpec.SetFinishTime(info);
3555   }
3556 
3557 
3558   UInt64 unpSize = numIterations * bufferSize;
3559   UInt64 unpSizeThreads = unpSize * numThreads;
3560   info.UnpackSize = unpSizeThreads;
3561   info.PackSize = unpSizeThreads;
3562   info.NumIterations = 1;
3563 
3564   if (_file)
3565   {
3566     if (showRating)
3567     {
3568       UInt64 unpSizeThreads2 = unpSizeThreads;
3569       if (unpSizeThreads2 == 0)
3570         unpSizeThreads2 = numIterations * 1 * numThreads;
3571       const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3572       const UInt64 rating = info.GetSpeed(numCommands);
3573       PrintResults(_file, info,
3574           benchWeight, rating,
3575           showFreq, cpuFreq, encodeRes);
3576     }
3577     RINOK(_file->CheckBreak())
3578   }
3579 
3580   speed = info.GetSpeed(unpSizeThreads);
3581   usage = info.GetUsage();
3582 
3583   return S_OK;
3584 }
3585 
3586 
3587 
3588 static HRESULT TotalBench_Hash(
3589     DECL_EXTERNAL_CODECS_LOC_VARS
3590     const COneMethodInfo &methodMask,
3591     UInt64 complexInCommands,
3592     UInt32 numThreads,
3593     size_t bufSize,
3594     const Byte *fileData,
3595     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3596     #ifndef Z7_ST
3597     const CAffinityMode *affinityMode,
3598     #endif
3599     CTotalBenchRes *encodeRes,
3600     bool showFreq, UInt64 cpuFreq)
3601 {
3602   for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3603   {
3604     const CBenchHash &bench = g_Hash[i];
3605     if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3606       continue;
3607     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3608     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3609     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3610     // callback->BenchProps.EncComplex = bench.EncComplex;
3611 
3612     COneMethodInfo method;
3613     NCOM::CPropVariant propVariant;
3614     propVariant = bench.Name;
3615     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3616 
3617     UInt64 speed, usage;
3618 
3619     const HRESULT res = CrcBench(
3620         EXTERNAL_CODECS_LOC_VARS
3621         complexInCommands,
3622         numThreads, bufSize, fileData,
3623         speed, usage,
3624         bench.Complex, bench.Weight,
3625         (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3626         method,
3627         printCallback,
3628      #ifndef Z7_ST
3629         affinityMode,
3630      #endif
3631         true, // showRating
3632         encodeRes, showFreq, cpuFreq);
3633     if (res == E_NOTIMPL)
3634     {
3635       // callback->Print(" ---");
3636     }
3637     else
3638     {
3639       RINOK(res)
3640     }
3641     callback->NewLine();
3642   }
3643   return S_OK;
3644 }
3645 
3646 struct CTempValues
3647 {
3648   UInt64 *Values;
3649   CTempValues(): Values(NULL) {}
3650   void Alloc(UInt32 num) { Values = new UInt64[num]; }
3651   ~CTempValues() { delete []Values; }
3652 };
3653 
3654 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3655 {
3656   const wchar_t *end;
3657   UInt64 result = ConvertStringToUInt64(s, &end);
3658   if (*end != 0 || s.IsEmpty())
3659     prop = s;
3660   else if (result <= (UInt32)0xFFFFFFFF)
3661     prop = (UInt32)result;
3662   else
3663     prop = result;
3664 }
3665 
3666 
3667 static bool AreSameMethodNames(const char *fullName, const char *shortName)
3668 {
3669   return StringsAreEqualNoCase_Ascii(fullName, shortName);
3670 }
3671 
3672 
3673 
3674 
3675 static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3676 {
3677   PrintRequirements(f, "usage:", true, usage, "Benchmark threads:   ", threads);
3678 }
3679 
3680 
3681 static void Print_Delimiter(IBenchPrintCallback &f)
3682 {
3683   f.Print(" |");
3684 }
3685 
3686 static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3687 {
3688   char s[16];
3689   ConvertUInt32ToString(pow, s);
3690   unsigned pos = MyStringLen(s);
3691   s[pos++] = ':';
3692   s[pos] = 0;
3693   PrintLeft(f, s, kFieldSize_SmallName); // 4
3694 }
3695 
3696 static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3697     UInt64 usage, UInt64 speed)
3698 {
3699   PrintUsage(f, usage, kFieldSize_Usage);
3700   PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3701 }
3702 
3703 
3704 HRESULT Bench(
3705     DECL_EXTERNAL_CODECS_LOC_VARS
3706     IBenchPrintCallback *printCallback,
3707     IBenchCallback *benchCallback,
3708     const CObjectVector<CProperty> &props,
3709     UInt32 numIterations,
3710     bool multiDict,
3711     IBenchFreqCallback *freqCallback)
3712 {
3713   // for (int y = 0; y < 10000; y++)
3714   if (!CrcInternalTest())
3715     return E_FAIL;
3716 
3717   UInt32 numCPUs = 1;
3718   size_t ramSize = (size_t)sizeof(size_t) << 29;
3719 
3720   NSystem::CProcessAffinity threadsInfo;
3721   threadsInfo.InitST();
3722 
3723   #ifndef Z7_ST
3724 
3725   if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3726     numCPUs = threadsInfo.GetNumProcessThreads();
3727   else
3728     numCPUs = NSystem::GetNumberOfProcessors();
3729 
3730   #endif
3731 
3732   // numCPUs = 24;
3733   /*
3734   {
3735     DWORD_PTR mask = (1 << 0);
3736     DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3737     old = old;
3738     DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3739     old2 = old2;
3740     return 0;
3741   }
3742   */
3743 
3744   const bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3745 
3746   UInt32 numThreadsSpecified = numCPUs;
3747   bool needSetComplexity = false;
3748   UInt32 testTimeMs = kComplexInMs;
3749   UInt32 startDicLog = 22;
3750   bool startDicLog_Defined = false;
3751   UInt64 specifiedFreq = 0;
3752   bool multiThreadTests = false;
3753   UInt64 complexInCommands = kComplexInCommands;
3754   UInt32 numThreads_Start = 1;
3755 
3756   #ifndef Z7_ST
3757   CAffinityMode affinityMode;
3758   #endif
3759 
3760 
3761   COneMethodInfo method;
3762 
3763   CMidAlignedBuffer fileDataBuffer;
3764   bool use_fileData = false;
3765   bool isFixedDict = false;
3766 
3767   {
3768   unsigned i;
3769 
3770   if (printCallback)
3771   {
3772     for (i = 0; i < props.Size(); i++)
3773     {
3774       const CProperty &property = props[i];
3775       printCallback->Print(" ");
3776       printCallback->Print(GetAnsiString(property.Name));
3777       if (!property.Value.IsEmpty())
3778       {
3779         printCallback->Print("=");
3780         printCallback->Print(GetAnsiString(property.Value));
3781       }
3782     }
3783     if (!props.IsEmpty())
3784       printCallback->NewLine();
3785   }
3786 
3787 
3788   for (i = 0; i < props.Size(); i++)
3789   {
3790     const CProperty &property = props[i];
3791     UString name (property.Name);
3792     name.MakeLower_Ascii();
3793 
3794     if (name.IsEqualTo("file"))
3795     {
3796       if (property.Value.IsEmpty())
3797         return E_INVALIDARG;
3798 
3799       NFile::NIO::CInFile file;
3800       if (!file.Open(us2fs(property.Value)))
3801         return GetLastError_noZero_HRESULT();
3802       size_t len;
3803       {
3804         UInt64 len64;
3805         if (!file.GetLength(len64))
3806           return GetLastError_noZero_HRESULT();
3807         if (printCallback)
3808         {
3809           printCallback->Print("file size =");
3810           PrintNumber(*printCallback, len64, 0);
3811           printCallback->NewLine();
3812         }
3813         len = (size_t)len64;
3814         if (len != len64)
3815           return E_INVALIDARG;
3816       }
3817 
3818       // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3819 
3820       ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3821       use_fileData = true;
3822 
3823       {
3824         size_t processed;
3825         if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3826           return GetLastError_noZero_HRESULT();
3827         if (processed != len)
3828           return E_FAIL;
3829       }
3830       continue;
3831     }
3832 
3833     NCOM::CPropVariant propVariant;
3834     if (!property.Value.IsEmpty())
3835       ParseNumberString(property.Value, propVariant);
3836 
3837     if (name.IsEqualTo("time"))
3838     {
3839       RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3840       needSetComplexity = true;
3841       testTimeMs *= 1000;
3842       continue;
3843     }
3844 
3845     if (name.IsEqualTo("timems"))
3846     {
3847       RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3848       needSetComplexity = true;
3849       continue;
3850     }
3851 
3852     if (name.IsEqualTo("tic"))
3853     {
3854       UInt32 v;
3855       RINOK(ParsePropToUInt32(UString(), propVariant, v))
3856       if (v >= 64)
3857         return E_INVALIDARG;
3858       complexInCommands = (UInt64)1 << v;
3859       continue;
3860     }
3861 
3862     const bool isCurrent_fixedDict = name.IsEqualTo("df");
3863     if (isCurrent_fixedDict)
3864       isFixedDict = true;
3865     if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3866     {
3867       RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3868       if (startDicLog > 32)
3869         return E_INVALIDARG;
3870       startDicLog_Defined = true;
3871       continue;
3872     }
3873 
3874     if (name.IsEqualTo("mts"))
3875     {
3876       RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3877       continue;
3878     }
3879 
3880     if (name.IsEqualTo("af"))
3881     {
3882       UInt32 bundle;
3883       RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3884       if (bundle > 0 && bundle < numCPUs)
3885       {
3886         #ifndef Z7_ST
3887         affinityMode.SetLevels(numCPUs, 2);
3888         affinityMode.NumBundleThreads = bundle;
3889         #endif
3890       }
3891       continue;
3892     }
3893 
3894     if (name.IsEqualTo("freq"))
3895     {
3896       UInt32 freq32 = 0;
3897       RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3898       if (freq32 == 0)
3899         return E_INVALIDARG;
3900       specifiedFreq = (UInt64)freq32 * 1000000;
3901 
3902       if (printCallback)
3903       {
3904         printCallback->Print("freq=");
3905         PrintNumber(*printCallback, freq32, 0);
3906         printCallback->NewLine();
3907       }
3908 
3909       continue;
3910     }
3911 
3912     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3913     {
3914       const UString s = name.Ptr(2);
3915       if (s.IsEqualTo("*")
3916           || (s.IsEmpty()
3917             && propVariant.vt == VT_BSTR
3918             && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3919       {
3920         multiThreadTests = true;
3921         continue;
3922       }
3923       #ifndef Z7_ST
3924       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3925       #endif
3926       continue;
3927     }
3928 
3929     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3930   }
3931   }
3932 
3933   if (printCallback)
3934   {
3935     AString s;
3936 
3937 #if 1 || !defined(Z7_MSC_VER_ORIGINAL) || (Z7_MSC_VER_ORIGINAL >= 1900)
3938     s += "Compiler: ";
3939     GetCompiler(s);
3940     printCallback->Print(s);
3941     printCallback->NewLine();
3942     s.Empty();
3943 #endif
3944 
3945     GetSystemInfoText(s);
3946     printCallback->Print(s);
3947     printCallback->NewLine();
3948   }
3949 
3950   if (printCallback)
3951   {
3952     printCallback->Print("1T CPU Freq (MHz):");
3953   }
3954 
3955   if (printCallback || freqCallback)
3956   {
3957     UInt64 numMilCommands = 1 << 6;
3958     if (specifiedFreq != 0)
3959     {
3960       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3961         numMilCommands >>= 1;
3962     }
3963 
3964     for (int jj = 0;; jj++)
3965     {
3966       if (printCallback)
3967         RINOK(printCallback->CheckBreak())
3968 
3969       UInt64 start = ::GetTimeCount();
3970       UInt32 sum = (UInt32)start;
3971       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3972       if (sum == 0xF1541213)
3973         if (printCallback)
3974           printCallback->Print("");
3975       const UInt64 realDelta = ::GetTimeCount() - start;
3976       start = realDelta;
3977       if (start == 0)
3978         start = 1;
3979       if (start > (UInt64)1 << 61)
3980         start = 1;
3981       const UInt64 freq = GetFreq();
3982       // mips is constant in some compilers
3983       const UInt64 hzVal = MyMultDiv64(numMilCommands * 1000000, freq, start);
3984       const UInt64 mipsVal = numMilCommands * freq / start;
3985       if (printCallback)
3986       {
3987         if (realDelta == 0)
3988         {
3989           printCallback->Print(" -");
3990         }
3991         else
3992         {
3993           // PrintNumber(*printCallback, start, 0);
3994           PrintNumber(*printCallback, mipsVal, 5);
3995         }
3996       }
3997       if (freqCallback)
3998       {
3999         RINOK(freqCallback->AddCpuFreq(1, hzVal, kBenchmarkUsageMult))
4000       }
4001 
4002       if (jj >= 1)
4003       {
4004         bool needStop = (numMilCommands >= (1 <<
4005           #ifdef _DEBUG
4006             7
4007           #else
4008             11
4009           #endif
4010           ));
4011         if (start >= freq * 16)
4012         {
4013           printCallback->Print(" (Cmplx)");
4014           if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
4015           {
4016             needSetComplexity = true;
4017           }
4018           needStop = true;
4019         }
4020         if (needSetComplexity)
4021           SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
4022         if (needStop)
4023           break;
4024         numMilCommands <<= 1;
4025       }
4026     }
4027     if (freqCallback)
4028     {
4029       RINOK(freqCallback->FreqsFinished(1))
4030     }
4031   }
4032 
4033   if (printCallback || freqCallback)
4034   for (unsigned test = 0; test < 3; test++)
4035   {
4036     if (numThreadsSpecified < 2)
4037     {
4038       // if (test == 1)
4039       break;
4040     }
4041     if (test == 2 && numThreadsSpecified <= numCPUs)
4042       break;
4043     if (printCallback)
4044       printCallback->NewLine();
4045 
4046     /* it can show incorrect frequency for HT threads. */
4047 
4048     UInt32 numThreads = numThreadsSpecified;
4049     if (test < 2)
4050     {
4051       if (numThreads >= numCPUs)
4052         numThreads = numCPUs;
4053       if (test == 0)
4054         numThreads /= 2;
4055     }
4056     if (numThreads < 1)
4057       numThreads = 1;
4058 
4059     if (printCallback)
4060     {
4061       char s[128];
4062       ConvertUInt64ToString(numThreads, s);
4063       printCallback->Print(s);
4064       printCallback->Print("T CPU Freq (MHz):");
4065     }
4066     UInt64 numMilCommands = 1 <<
4067           #ifdef _DEBUG
4068             7;
4069           #else
4070             10;
4071           #endif
4072 
4073     if (specifiedFreq != 0)
4074     {
4075       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
4076         numMilCommands >>= 1;
4077     }
4078 
4079     // for (int jj = 0;; jj++)
4080     for (;;)
4081     {
4082       if (printCallback)
4083         RINOK(printCallback->CheckBreak())
4084 
4085       {
4086         // PrintLeft(f, "CPU", kFieldSize_Name);
4087 
4088         // UInt32 resVal;
4089 
4090         CFreqBench fb;
4091         fb.complexInCommands = numMilCommands * 1000000;
4092         fb.numThreads = numThreads;
4093         // showFreq;
4094         // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4095         fb.showFreq = true;
4096         fb.specifiedFreq = 1;
4097 
4098         const HRESULT res = fb.FreqBench(NULL /* printCallback */
4099             #ifndef Z7_ST
4100               , &affinityMode
4101             #endif
4102             );
4103         RINOK(res)
4104 
4105         if (freqCallback)
4106         {
4107           RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4108         }
4109 
4110         if (printCallback)
4111         {
4112           /*
4113           if (realDelta == 0)
4114           {
4115             printCallback->Print(" -");
4116           }
4117           else
4118           */
4119           {
4120             // PrintNumber(*printCallback, start, 0);
4121             PrintUsage(*printCallback, fb.UsageRes, 3);
4122             printCallback->Print("%");
4123             PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4124             printCallback->Print("  ");
4125 
4126             // PrintNumber(*printCallback, fb.UsageRes, 5);
4127           }
4128         }
4129       }
4130       // if (jj >= 1)
4131       {
4132         const bool needStop = (numMilCommands >= (1 <<
4133           #ifdef _DEBUG
4134             7
4135           #else
4136             11
4137           #endif
4138           ));
4139         if (needStop)
4140           break;
4141         numMilCommands <<= 1;
4142       }
4143     }
4144     if (freqCallback)
4145     {
4146       RINOK(freqCallback->FreqsFinished(numThreads))
4147     }
4148   }
4149 
4150 
4151   if (printCallback)
4152   {
4153     printCallback->NewLine();
4154     printCallback->NewLine();
4155     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4156     printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4157     printCallback->NewLine();
4158   }
4159 
4160   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4161     return E_INVALIDARG;
4162 
4163   UInt64 dict = (UInt64)1 << startDicLog;
4164   const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4165 
4166   const unsigned level = method.GetLevel();
4167 
4168   AString &methodName = method.MethodName;
4169   const AString original_MethodName = methodName;
4170   if (methodName.IsEmpty())
4171     methodName = "LZMA";
4172 
4173   if (benchCallback)
4174   {
4175     CBenchProps benchProps;
4176     benchProps.SetLzmaCompexity();
4177     const UInt64 dictSize = method.Get_Lzma_DicSize();
4178 
4179     size_t uncompressedDataSize;
4180     if (use_fileData)
4181     {
4182       uncompressedDataSize = fileDataBuffer.Size();
4183     }
4184     else
4185     {
4186       uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4187       if (uncompressedDataSize < dictSize)
4188         return E_INVALIDARG;
4189     }
4190 
4191     return MethodBench(
4192         EXTERNAL_CODECS_LOC_VARS
4193         complexInCommands,
4194       #ifndef Z7_ST
4195         true, numThreadsSpecified,
4196         &affinityMode,
4197       #endif
4198         method,
4199         uncompressedDataSize, (const Byte *)fileDataBuffer,
4200         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4201   }
4202 
4203   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4204     methodName = "crc32";
4205 
4206   CMethodId hashID;
4207   const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4208   int codecIndex = -1;
4209   bool isFilter = false;
4210   if (!isHashMethod)
4211   {
4212     UInt32 numStreams;
4213     codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4214         true,  // encode
4215         hashID, numStreams, isFilter);
4216     // we can allow non filter for BW tests
4217     if (!isFilter) codecIndex = -1;
4218   }
4219 
4220   CBenchCallbackToPrint callback;
4221   callback.Init();
4222   callback._file = printCallback;
4223 
4224   if (isHashMethod || codecIndex != -1)
4225   {
4226     if (!printCallback)
4227       return S_FALSE;
4228     IBenchPrintCallback &f = *printCallback;
4229 
4230     UInt64 dict64 = dict;
4231     if (!dictIsDefined)
4232       dict64 = (1 << 27);
4233     if (use_fileData)
4234     {
4235       if (!dictIsDefined)
4236         dict64 = fileDataBuffer.Size();
4237       else if (dict64 > fileDataBuffer.Size())
4238         dict64 = fileDataBuffer.Size();
4239     }
4240 
4241     for (;;)
4242     {
4243       const int index = method.FindProp(NCoderPropID::kDictionarySize);
4244       if (index < 0)
4245         break;
4246       method.Props.Delete((unsigned)index);
4247     }
4248 
4249     // methodName.RemoveChar(L'-');
4250     Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4251     const UInt32 *checkSum = NULL;
4252     int benchIndex = -1;
4253 
4254     if (isHashMethod)
4255     {
4256       for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4257       {
4258         const CBenchHash &h = g_Hash[i];
4259         AString benchMethod (h.Name);
4260         AString benchProps;
4261         const int propPos = benchMethod.Find(':');
4262         if (propPos >= 0)
4263         {
4264           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4265           benchMethod.DeleteFrom((unsigned)propPos);
4266         }
4267 
4268         if (AreSameMethodNames(benchMethod, methodName))
4269         {
4270           const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4271           /*
4272           bool isMainMethod = method.PropsString.IsEmpty();
4273           if (isMainMethod)
4274             isMainMethod = !checkSum
4275                 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4276           if (sameProps || isMainMethod)
4277           */
4278           {
4279             complexity = (Int32)h.Complex;
4280             checkSum = &h.CheckSum;
4281             if (sameProps)
4282               break;
4283             /*
4284             if property. is not specified, we use the complexity
4285             for latest fastest method (crc32:64)
4286             */
4287           }
4288         }
4289       }
4290       // if (!checkSum) return E_NOTIMPL;
4291     }
4292     else
4293     {
4294       for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4295       {
4296         const CBenchMethod &bench = g_Bench[i];
4297         AString benchMethod (bench.Name);
4298         AString benchProps;
4299         const int propPos = benchMethod.Find(':');
4300         if (propPos >= 0)
4301         {
4302           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4303           benchMethod.DeleteFrom((unsigned)propPos);
4304         }
4305 
4306         if (AreSameMethodNames(benchMethod, methodName))
4307         {
4308           const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4309           // bool isMainMethod = method.PropsString.IsEmpty();
4310           // if (sameProps || isMainMethod)
4311           {
4312             benchIndex = (int)i;
4313             if (sameProps)
4314               break;
4315           }
4316         }
4317       }
4318       // if (benchIndex < 0) return E_NOTIMPL;
4319     }
4320 
4321     {
4322       /* we count usage only for crc and filter. non-filters are not supported */
4323       UInt64 usage = (1 << 20);
4324       UInt64 bufSize = dict64;
4325       UInt32 numBlocks = isHashMethod ? 1 : 3;
4326       if (use_fileData)
4327       {
4328         usage += fileDataBuffer.Size();
4329         if (bufSize > fileDataBuffer.Size())
4330           bufSize = fileDataBuffer.Size();
4331         if (isHashMethod)
4332         {
4333           numBlocks = 0;
4334           #ifndef Z7_ST
4335           if (numThreadsSpecified != 1)
4336             numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4337           #endif
4338         }
4339       }
4340       usage += numThreadsSpecified * bufSize * numBlocks;
4341       Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4342     }
4343 
4344     CUIntVector numThreadsVector;
4345     {
4346       unsigned nt = numThreads_Start;
4347       for (;;)
4348       {
4349         if (nt > numThreadsSpecified)
4350           break;
4351         numThreadsVector.Add(nt);
4352         const unsigned next = nt * 2;
4353         const UInt32 ntHalf= numThreadsSpecified / 2;
4354         if (ntHalf > nt && ntHalf < next)
4355           numThreadsVector.Add(ntHalf);
4356         if (numThreadsSpecified > nt && numThreadsSpecified < next)
4357           numThreadsVector.Add(numThreadsSpecified);
4358         nt = next;
4359       }
4360     }
4361 
4362     unsigned numColumns = isHashMethod ? 1 : 2;
4363     CTempValues speedTotals;
4364     CTempValues usageTotals;
4365     {
4366       const unsigned numItems = numThreadsVector.Size() * numColumns;
4367       speedTotals.Alloc(numItems);
4368       usageTotals.Alloc(numItems);
4369       for (unsigned i = 0; i < numItems; i++)
4370       {
4371         speedTotals.Values[i] = 0;
4372         usageTotals.Values[i] = 0;
4373       }
4374     }
4375 
4376     f.NewLine();
4377     for (unsigned line = 0; line < 3; line++)
4378     {
4379       f.NewLine();
4380       f.Print(line == 0 ? "THRD" : line == 1 ? "    " : "Size");
4381       FOR_VECTOR (ti, numThreadsVector)
4382       {
4383         if (ti != 0)
4384           Print_Delimiter(f);
4385         if (line == 0)
4386         {
4387           PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4388           PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4389         }
4390         else
4391         {
4392           for (unsigned c = 0; c < numColumns; c++)
4393           {
4394             PrintRight(f, line == 1 ? "Usage" : "%",    kFieldSize_Usage + 1);
4395             PrintRight(f, line == 1 ? "BW"    : "MB/s", kFieldSize_CrcSpeed + 1);
4396           }
4397         }
4398       }
4399     }
4400     f.NewLine();
4401 
4402     UInt64 numSteps = 0;
4403 
4404     // for (UInt32 iter = 0; iter < numIterations; iter++)
4405     // {
4406     unsigned pow = 10; // kNumHashDictBits
4407     if (startDicLog_Defined)
4408       pow = startDicLog;
4409 
4410     // #define NUM_SUB_BITS 2
4411     // pow <<= NUM_SUB_BITS;
4412     for (;; pow++)
4413     {
4414       const UInt64 bufSize = (UInt64)1 << pow;
4415       // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4416       // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4417 
4418       size_t dataSize = fileDataBuffer.Size();
4419       if (dataSize > bufSize || !use_fileData)
4420         dataSize = (size_t)bufSize;
4421 
4422       for (UInt32 iter = 0; iter < numIterations; iter++)
4423       {
4424         Print_Pow(f, pow);
4425         // PrintNumber(f, bufSize >> 10, 4);
4426 
4427         FOR_VECTOR (ti, numThreadsVector)
4428         {
4429           RINOK(f.CheckBreak())
4430           const UInt32 numThreads = numThreadsVector[ti];
4431           if (isHashMethod)
4432           {
4433             UInt64 speed = 0;
4434             UInt64 usage = 0;
4435             const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4436               numThreads,
4437               dataSize, (const Byte *)fileDataBuffer,
4438               speed, usage,
4439               (UInt32)complexity,
4440               1, // benchWeight,
4441               (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4442               method,
4443               &f,
4444             #ifndef Z7_ST
4445               &affinityMode,
4446             #endif
4447               false, // showRating
4448               NULL, false, 0);
4449             RINOK(res)
4450 
4451             if (ti != 0)
4452               Print_Delimiter(f);
4453 
4454             Bench_BW_Print_Usage_Speed(f, usage, speed);
4455             speedTotals.Values[ti] += speed;
4456             usageTotals.Values[ti] += usage;
4457           }
4458           else
4459           {
4460             {
4461               unsigned keySize = 32;
4462                    if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4463               else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4464               callback.BenchProps.KeySize = keySize;
4465             }
4466 
4467             COneMethodInfo method2 = method;
4468             unsigned bench_DictBits;
4469 
4470             if (benchIndex >= 0)
4471             {
4472               const CBenchMethod &bench = g_Bench[benchIndex];
4473               callback.BenchProps.EncComplex = bench.EncComplex;
4474               callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4475               callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4476               bench_DictBits = bench.DictBits;
4477               // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4478             }
4479             else
4480             {
4481               bench_DictBits = kOldLzmaDictBits; // = 32 default
4482               if (isFilter)
4483               {
4484                 const unsigned k_UnknownCoderComplexity = 4;
4485                 callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4486                 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4487               }
4488               else
4489               {
4490                 callback.BenchProps.EncComplex = 1 << 10;
4491                 callback.BenchProps.DecComplexUnc = 1 << 6;
4492               }
4493               callback.BenchProps.DecComplexCompr = 0;
4494             }
4495             callback.NeedPrint = false;
4496 
4497             if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4498             {
4499               const NCOM::CPropVariant propVariant = (UInt32)pow;
4500               RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4501             }
4502 
4503             const HRESULT res = MethodBench(
4504                 EXTERNAL_CODECS_LOC_VARS
4505                 complexInCommands,
4506               #ifndef Z7_ST
4507                 false, // oldLzmaBenchMode
4508                 numThreadsVector[ti],
4509                 &affinityMode,
4510               #endif
4511                 method2,
4512                 dataSize, (const Byte *)fileDataBuffer,
4513                 bench_DictBits,
4514                 printCallback,
4515                 &callback,
4516                 &callback.BenchProps);
4517             RINOK(res)
4518 
4519             if (ti != 0)
4520               Print_Delimiter(f);
4521 
4522             for (unsigned i = 0; i < 2; i++)
4523             {
4524               const CBenchInfo &bi = callback.BenchInfo_Results[i];
4525               const UInt64 usage = bi.GetUsage();
4526               const UInt64 speed = bi.GetUnpackSizeSpeed();
4527               usageTotals.Values[ti * 2 + i] += usage;
4528               speedTotals.Values[ti * 2 + i] += speed;
4529               Bench_BW_Print_Usage_Speed(f, usage, speed);
4530             }
4531           }
4532         }
4533 
4534         f.NewLine();
4535         numSteps++;
4536       }
4537       if (dataSize >= dict64)
4538         break;
4539     }
4540 
4541     if (numSteps != 0)
4542     {
4543       f.Print("Avg:");
4544       for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4545       {
4546         if (ti != 0)
4547           Print_Delimiter(f);
4548         for (unsigned i = 0; i < numColumns; i++)
4549           Bench_BW_Print_Usage_Speed(f,
4550               usageTotals.Values[ti * numColumns + i] / numSteps,
4551               speedTotals.Values[ti * numColumns + i] / numSteps);
4552       }
4553       f.NewLine();
4554     }
4555 
4556     return S_OK;
4557   }
4558 
4559   bool use2Columns = false;
4560 
4561   bool totalBenchMode = false;
4562   bool onlyHashBench = false;
4563   if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4564   {
4565     onlyHashBench = true;
4566     methodName = "*";
4567     totalBenchMode = true;
4568   }
4569   else if (methodName.Find('*') >= 0)
4570     totalBenchMode = true;
4571 
4572   // ---------- Threads loop ----------
4573   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4574   {
4575 
4576   UInt32 numThreads = numThreadsSpecified;
4577 
4578   if (!multiThreadTests)
4579   {
4580     if (threadsPassIndex != 0)
4581       break;
4582   }
4583   else
4584   {
4585     numThreads = 1;
4586     if (threadsPassIndex != 0)
4587     {
4588       if (numCPUs < 2)
4589         break;
4590       numThreads = numCPUs;
4591       if (threadsPassIndex == 1)
4592       {
4593         if (numCPUs >= 4)
4594           numThreads = numCPUs / 2;
4595       }
4596       else if (numCPUs < 4)
4597         break;
4598     }
4599   }
4600 
4601   IBenchPrintCallback &f = *printCallback;
4602 
4603   if (threadsPassIndex > 0)
4604   {
4605     f.NewLine();
4606     f.NewLine();
4607   }
4608 
4609   if (!dictIsDefined && !onlyHashBench)
4610   {
4611     // we use dicSizeLog and dicSizeLog_Main for data size.
4612     // also we use it to reduce dictionary size of LZMA encoder via NCoderPropID::kReduceSize.
4613     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4614     unsigned dicSizeLog = dicSizeLog_Main;
4615 
4616     #ifdef UNDER_CE
4617     dicSizeLog = (UInt64)1 << 20;
4618     #endif
4619 
4620     if (ramSize_Defined)
4621     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4622       if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4623         break;
4624 
4625     dict = (UInt64)1 << dicSizeLog;
4626 
4627     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4628     {
4629       f.Print("Dictionary reduced to: ");
4630       PrintNumber(f, dicSizeLog, 1);
4631       f.NewLine();
4632     }
4633   }
4634 
4635   Print_Usage_and_Threads(f,
4636       onlyHashBench ?
4637         GetBenchMemoryUsage_Hash(numThreads, dict) :
4638         GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4639       numThreads);
4640 
4641   f.NewLine();
4642 
4643   f.NewLine();
4644 
4645   if (totalBenchMode)
4646   {
4647     callback.NameFieldSize = kFieldSize_Name;
4648     use2Columns = false;
4649   }
4650   else
4651   {
4652     callback.NameFieldSize = kFieldSize_SmallName;
4653     use2Columns = true;
4654   }
4655   callback.Use2Columns = use2Columns;
4656 
4657   bool showFreq = false;
4658   UInt64 cpuFreq = 0;
4659 
4660   if (totalBenchMode)
4661   {
4662     showFreq = true;
4663   }
4664 
4665   unsigned fileldSize = kFieldSize_TotalSize;
4666   if (showFreq)
4667     fileldSize += kFieldSize_EUAndEffec;
4668 
4669   if (use2Columns)
4670   {
4671     PrintSpaces(f, callback.NameFieldSize);
4672     PrintRight(f, "Compressing", fileldSize);
4673     f.Print(kSep);
4674     PrintRight(f, "Decompressing", fileldSize);
4675   }
4676   f.NewLine();
4677   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4678 
4679   int j;
4680 
4681   for (j = 0; j < 2; j++)
4682   {
4683     PrintRight(f, "Speed", kFieldSize_Speed + 1);
4684     PrintRight(f, "Usage", kFieldSize_Usage + 1);
4685     PrintRight(f, "R/U", kFieldSize_RU + 1);
4686     PrintRight(f, "Rating", kFieldSize_Rating + 1);
4687     if (showFreq)
4688     {
4689       PrintRight(f, "E/U", kFieldSize_EU + 1);
4690       PrintRight(f, "Effec", kFieldSize_Effec + 1);
4691     }
4692     if (!use2Columns)
4693       break;
4694     if (j == 0)
4695       f.Print(kSep);
4696   }
4697 
4698   f.NewLine();
4699   PrintSpaces(f, callback.NameFieldSize);
4700 
4701   for (j = 0; j < 2; j++)
4702   {
4703     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4704     PrintRight(f, "%", kFieldSize_Usage + 1);
4705     PrintRight(f, "MIPS", kFieldSize_RU + 1);
4706     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4707     if (showFreq)
4708     {
4709       PrintRight(f, "%", kFieldSize_EU + 1);
4710       PrintRight(f, "%", kFieldSize_Effec + 1);
4711     }
4712     if (!use2Columns)
4713       break;
4714     if (j == 0)
4715       f.Print(kSep);
4716   }
4717 
4718   f.NewLine();
4719   f.NewLine();
4720 
4721   if (specifiedFreq != 0)
4722     cpuFreq = specifiedFreq;
4723 
4724   // bool showTotalSpeed = false;
4725 
4726   if (totalBenchMode)
4727   {
4728     for (UInt32 i = 0; i < numIterations; i++)
4729     {
4730       if (i != 0)
4731         printCallback->NewLine();
4732 
4733       const unsigned kNumCpuTests = 3;
4734       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4735       {
4736         PrintLeft(f, "CPU", kFieldSize_Name);
4737 
4738         // UInt32 resVal;
4739 
4740         CFreqBench fb;
4741         fb.complexInCommands = complexInCommands;
4742         fb.numThreads = numThreads;
4743         // showFreq;
4744         fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4745         fb.specifiedFreq = specifiedFreq;
4746 
4747         const HRESULT res = fb.FreqBench(printCallback
4748             #ifndef Z7_ST
4749               , &affinityMode
4750             #endif
4751             );
4752         RINOK(res)
4753 
4754         cpuFreq = fb.CpuFreqRes;
4755         callback.NewLine();
4756 
4757         if (specifiedFreq != 0)
4758           cpuFreq = specifiedFreq;
4759 
4760         if (testTimeMs >= 1000)
4761         if (freqTest == kNumCpuTests - 1)
4762         {
4763           // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4764         }
4765       }
4766       callback.NewLine();
4767 
4768       // return S_OK; // change it
4769 
4770       callback.SetFreq(true, cpuFreq);
4771 
4772       if (!onlyHashBench)
4773       {
4774         size_t dataSize = (size_t)dict;
4775         if (use_fileData)
4776         {
4777           dataSize = fileDataBuffer.Size();
4778           if (dictIsDefined && dataSize > dict)
4779             dataSize = (size_t)dict;
4780         }
4781 
4782         const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4783             method, complexInCommands,
4784           #ifndef Z7_ST
4785             numThreads,
4786             &affinityMode,
4787           #endif
4788             dictIsDefined || use_fileData, // forceUnpackSize
4789             dataSize,
4790             (const Byte *)fileDataBuffer,
4791             printCallback, &callback);
4792         RINOK(res)
4793       }
4794 
4795       {
4796         size_t dataSize = (size_t)1 << kNumHashDictBits;
4797         if (dictIsDefined)
4798         {
4799           dataSize = (size_t)dict;
4800           if (dataSize != dict)
4801             return E_OUTOFMEMORY;
4802         }
4803         if (use_fileData)
4804         {
4805           dataSize = fileDataBuffer.Size();
4806           if (dictIsDefined && dataSize > dict)
4807             dataSize = (size_t)dict;
4808         }
4809 
4810         const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4811             method, complexInCommands,
4812             numThreads,
4813             dataSize, (const Byte *)fileDataBuffer,
4814             printCallback, &callback,
4815         #ifndef Z7_ST
4816           &affinityMode,
4817         #endif
4818           &callback.EncodeRes, true, cpuFreq);
4819         RINOK(res)
4820       }
4821 
4822       callback.NewLine();
4823       {
4824         PrintLeft(f, "CPU", kFieldSize_Name);
4825 
4826         CFreqBench fb;
4827         fb.complexInCommands = complexInCommands;
4828         fb.numThreads = numThreads;
4829         // showFreq;
4830         fb.showFreq = (specifiedFreq != 0);
4831         fb.specifiedFreq = specifiedFreq;
4832 
4833         const HRESULT res = fb.FreqBench(printCallback
4834           #ifndef Z7_ST
4835             , &affinityMode
4836           #endif
4837           );
4838         RINOK(res)
4839         callback.NewLine();
4840       }
4841     }
4842   }
4843   else
4844   {
4845     needSetComplexity = true;
4846     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4847     {
4848       unsigned i;
4849       for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4850       {
4851         const CBenchMethod &h = g_Bench[i];
4852         AString benchMethod (h.Name);
4853         AString benchProps;
4854         const int propPos = benchMethod.Find(':');
4855         if (propPos >= 0)
4856         {
4857           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4858           benchMethod.DeleteFrom((unsigned)propPos);
4859         }
4860 
4861         if (AreSameMethodNames(benchMethod, methodName))
4862         {
4863           if (benchProps.IsEmpty()
4864               || (benchProps == "x5" && method.PropsString.IsEmpty())
4865               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4866           {
4867             callback.BenchProps.EncComplex = h.EncComplex;
4868             callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4869             callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4870             needSetComplexity = false;
4871             break;
4872           }
4873         }
4874       }
4875       /*
4876       if (i == Z7_ARRAY_SIZE(g_Bench))
4877         return E_NOTIMPL;
4878       */
4879     }
4880     if (needSetComplexity)
4881       callback.BenchProps.SetLzmaCompexity();
4882 
4883   if (startDicLog < kBenchMinDicLogSize)
4884     startDicLog = kBenchMinDicLogSize;
4885 
4886   for (unsigned i = 0; i < numIterations; i++)
4887   {
4888     unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4889     if (!multiDict)
4890       pow = 32;
4891     while (GetDictSizeFromLog(pow) > dict && pow > 0)
4892       pow--;
4893     for (; GetDictSizeFromLog(pow) <= dict; pow++)
4894     {
4895       Print_Pow(f, pow);
4896       callback.DictSize = (UInt64)1 << pow;
4897 
4898       COneMethodInfo method2 = method;
4899 
4900       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4901       {
4902         // We add dictionary size property.
4903         // method2 can have two different dictionary size properties.
4904         // And last property is main.
4905         NCOM::CPropVariant propVariant = (UInt32)pow;
4906         RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4907       }
4908 
4909       size_t uncompressedDataSize;
4910       if (use_fileData)
4911       {
4912         uncompressedDataSize = fileDataBuffer.Size();
4913       }
4914       else
4915       {
4916         uncompressedDataSize = (size_t)callback.DictSize;
4917         if (uncompressedDataSize != callback.DictSize)
4918           return E_OUTOFMEMORY;
4919         if (uncompressedDataSize >= (1 << 18))
4920           uncompressedDataSize += kAdditionalSize;
4921       }
4922 
4923       const HRESULT res = MethodBench(
4924           EXTERNAL_CODECS_LOC_VARS
4925           complexInCommands,
4926         #ifndef Z7_ST
4927           true, numThreads,
4928           &affinityMode,
4929         #endif
4930           method2,
4931           uncompressedDataSize, (const Byte *)fileDataBuffer,
4932           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4933       f.NewLine();
4934       RINOK(res)
4935       if (!multiDict)
4936         break;
4937     }
4938   }
4939   }
4940 
4941   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4942 
4943   if (use2Columns)
4944   {
4945     f.Print(kSep);
4946     PrintChars(f, '-', fileldSize);
4947   }
4948 
4949   f.NewLine();
4950 
4951   if (use2Columns)
4952   {
4953     PrintLeft(f, "Avr:", callback.NameFieldSize);
4954     PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4955     f.Print(kSep);
4956     PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4957     f.NewLine();
4958   }
4959 
4960   PrintLeft(f, "Tot:", callback.NameFieldSize);
4961   CTotalBenchRes midRes;
4962   midRes = callback.EncodeRes;
4963   midRes.Update_With_Res(callback.DecodeRes);
4964 
4965   // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4966   PrintTotals(f, showFreq, cpuFreq, false, midRes);
4967   f.NewLine();
4968 
4969   }
4970   return S_OK;
4971 }
4972