1 // Bench.cpp
2
3 #include "StdAfx.h"
4
5 // #include <stdio.h>
6
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif // _WIN32
11
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #include <unistd.h>
15 #ifdef USE_POSIX_TIME2
16 #include <sys/time.h>
17 #include <sys/times.h>
18 #endif
19 #endif // USE_POSIX_TIME
20
21 #ifdef _WIN32
22 #define USE_ALLOCA
23 #endif
24
25 #ifdef USE_ALLOCA
26 #ifdef _WIN32
27 #include <malloc.h>
28 #else
29 #include <stdlib.h>
30 #endif
31 #define BENCH_ALLOCA_VALUE(index) (((index) * 64 * 21) & 0x7FF)
32 #endif
33
34 #include "../../../../C/7zCrc.h"
35 #include "../../../../C/RotateDefs.h"
36 #include "../../../../C/CpuArch.h"
37
38 #ifndef Z7_ST
39 #include "../../../Windows/Synchronization.h"
40 #include "../../../Windows/Thread.h"
41 #endif
42
43 #include "../../../Windows/FileFind.h"
44 #include "../../../Windows/FileIO.h"
45 #include "../../../Windows/SystemInfo.h"
46
47 #include "../../../Common/MyBuffer2.h"
48 #include "../../../Common/IntToString.h"
49 #include "../../../Common/StringConvert.h"
50 #include "../../../Common/StringToInt.h"
51 #include "../../../Common/Wildcard.h"
52
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamObjects.h"
55 #include "../../Common/StreamUtils.h"
56
57 #include "Bench.h"
58
59 using namespace NWindows;
60
61 #ifndef Z7_ST
62 static const UInt32 k_LZMA = 0x030101;
63 #endif
64
65 static const UInt64 kComplexInCommands = (UInt64)1 <<
66 #ifdef UNDER_CE
67 31;
68 #else
69 34;
70 #endif
71
72 static const UInt32 kComplexInMs = 4000;
73
SetComplexCommandsMs(UInt32 complexInMs,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)74 static void SetComplexCommandsMs(UInt32 complexInMs,
75 bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
76 {
77 complexInCommands = kComplexInCommands;
78 const UInt64 kMinFreq = (UInt64)1000000 * 4;
79 const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
80 if (cpuFreq < kMinFreq && !isSpecifiedFreq)
81 cpuFreq = kMinFreq;
82 if (cpuFreq < kMaxFreq || isSpecifiedFreq)
83 {
84 if (complexInMs != 0)
85 complexInCommands = complexInMs * cpuFreq / 1000;
86 else
87 complexInCommands = cpuFreq >> 2;
88 }
89 }
90
91 // const UInt64 kBenchmarkUsageMult = 1000000; // for debug
92 static const unsigned kBenchmarkUsageMultBits = 16;
93 static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
94
Benchmark_GetUsage_Percents(UInt64 usage)95 UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
96 {
97 return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
98 }
99
100 static const unsigned kNumHashDictBits = 17;
101 static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
102
103 static const unsigned kOldLzmaDictBits = 32;
104
105 // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
106 static const size_t kAdditionalSize = (size_t)1 << 16;
107 static const size_t kCompressedAdditionalSize = 1 << 10;
108
109 static const UInt32 kMaxMethodPropSize = 1 << 6;
110
111
112 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
113 { (_buffer_)->Alloc(_size_); \
114 if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
115
116
117 class CBaseRandomGenerator
118 {
119 UInt32 A1;
120 UInt32 A2;
121 UInt32 Salt;
122 public:
CBaseRandomGenerator(UInt32 salt=0)123 CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()124 void Init() { A1 = 362436069; A2 = 521288629;}
125 Z7_FORCE_INLINE
GetRnd()126 UInt32 GetRnd()
127 {
128 #if 0
129 // for debug:
130 return 0x0c080400;
131 // return 0;
132 #else
133 return Salt ^
134 (
135 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
136 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
137 );
138 #endif
139 }
140 };
141
142
143 static const size_t k_RandBuf_AlignMask = 4 - 1;
144
145 Z7_NO_INLINE
RandGen_BufAfterPad(Byte * buf,size_t size)146 static void RandGen_BufAfterPad(Byte *buf, size_t size)
147 {
148 CBaseRandomGenerator RG;
149 for (size_t i = 0; i < size; i += 4)
150 {
151 const UInt32 v = RG.GetRnd();
152 SetUi32a(buf + i, v)
153 }
154 /*
155 UInt32 v = RG.GetRnd();
156 for (; i < size; i++)
157 {
158 buf[i] = (Byte)v;
159 v >>= 8;
160 }
161 */
162 }
163
164
165 class CBenchRandomGenerator: public CMidAlignedBuffer
166 {
GetVal(UInt32 & res,unsigned numBits)167 static UInt32 GetVal(UInt32 &res, unsigned numBits)
168 {
169 const UInt32 val = res & (((UInt32)1 << numBits) - 1);
170 res >>= numBits;
171 return val;
172 }
173
GetLen(UInt32 & r)174 static UInt32 GetLen(UInt32 &r)
175 {
176 const unsigned len = (unsigned)GetVal(r, 2);
177 return GetVal(r, 1 + len);
178 }
179
180 public:
181
GenerateSimpleRandom(UInt32 salt)182 void GenerateSimpleRandom(UInt32 salt)
183 {
184 CBaseRandomGenerator rg(salt);
185 const size_t bufSize = Size();
186 Byte *buf = (Byte *)*this;
187 for (size_t i = 0; i < bufSize; i++)
188 buf[i] = (Byte)rg.GetRnd();
189 }
190
GenerateLz(unsigned dictBits,UInt32 salt)191 void GenerateLz(unsigned dictBits, UInt32 salt)
192 {
193 CBaseRandomGenerator rg(salt);
194 size_t pos = 0;
195 size_t rep0 = 1;
196 const size_t bufSize = Size();
197 Byte *buf = (Byte *)*this;
198 unsigned posBits = 1;
199
200 // printf("\n dictBits = %d\n", (UInt32)dictBits);
201 // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
202
203 while (pos < bufSize)
204 {
205 /*
206 if (pos >= ((UInt32)1 << 31))
207 printf(" %x\n", pos);
208 */
209 UInt32 r = rg.GetRnd();
210 if (GetVal(r, 1) == 0 || pos < 1024)
211 buf[pos++] = (Byte)(r & 0xFF);
212 else
213 {
214 UInt32 len;
215 len = 1 + GetLen(r);
216
217 if (GetVal(r, 3) != 0)
218 {
219 len += GetLen(r);
220
221 while (((size_t)1 << posBits) < pos)
222 posBits++;
223
224 unsigned numBitsMax = dictBits;
225 if (numBitsMax > posBits)
226 numBitsMax = posBits;
227
228 const unsigned kAddBits = 6;
229 unsigned numLogBits = 5;
230 if (numBitsMax <= (1 << 4) - 1 + kAddBits)
231 numLogBits = 4;
232
233 for (;;)
234 {
235 const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
236 r = rg.GetRnd();
237 if (ppp > numBitsMax)
238 continue;
239 // rep0 = GetVal(r, ppp);
240 rep0 = r & (((size_t)1 << ppp) - 1);
241 if (rep0 < pos)
242 break;
243 r = rg.GetRnd();
244 }
245 rep0++;
246 }
247
248 // len *= 300; // for debug
249 {
250 const size_t rem = bufSize - pos;
251 if (len > rem)
252 len = (UInt32)rem;
253 }
254 Byte *dest = buf + pos;
255 const Byte *src = dest - rep0;
256 pos += len;
257 for (UInt32 i = 0; i < len; i++)
258 *dest++ = *src++;
259 }
260 }
261 // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
262 }
263 };
264
265
266 Z7_CLASS_IMP_NOQIB_1(
267 CBenchmarkInStream
268 , ISequentialInStream
269 )
270 const Byte *Data;
271 size_t Pos;
272 size_t Size;
273 public:
274 void Init(const Byte *data, size_t size)
275 {
276 Data = data;
277 Size = size;
278 Pos = 0;
279 }
280 bool WasFinished() const { return Pos == Size; }
281 };
282
283 Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
284 {
285 const UInt32 kMaxBlockSize = (1 << 20);
286 if (size > kMaxBlockSize)
287 size = kMaxBlockSize;
288 const size_t remain = Size - Pos;
289 if (size > remain)
290 size = (UInt32)remain;
291
292 if (size)
293 memcpy(data, Data + Pos, size);
294
295 Pos += size;
296 if (processedSize)
297 *processedSize = size;
298 return S_OK;
299 }
300
301
302 class CBenchmarkOutStream Z7_final:
303 public ISequentialOutStream,
304 public CMyUnknownImp,
305 public CMidAlignedBuffer
306 {
307 Z7_COM_UNKNOWN_IMP_0
308 Z7_IFACE_COM7_IMP(ISequentialOutStream)
309 // bool _overflow;
310 public:
311 size_t Pos;
312 bool RealCopy;
313 bool CalcCrc;
314 UInt32 Crc;
315
316 // CBenchmarkOutStream(): _overflow(false) {}
317 void Init(bool realCopy, bool calcCrc)
318 {
319 Crc = CRC_INIT_VAL;
320 RealCopy = realCopy;
321 CalcCrc = calcCrc;
322 // _overflow = false;
323 Pos = 0;
324 }
325
326 void InitCrc()
327 {
328 Crc = CRC_INIT_VAL;
329 }
330
331 void Calc(const void *data, size_t size)
332 {
333 Crc = CrcUpdate(Crc, data, size);
334 }
335
336 size_t GetPos() const { return Pos; }
337
338 // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
339 };
340
341 Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
342 {
343 size_t curSize = Size() - Pos;
344 if (curSize > size)
345 curSize = size;
346 if (curSize != 0)
347 {
348 if (RealCopy)
349 memcpy(((Byte *)*this) + Pos, data, curSize);
350 if (CalcCrc)
351 Calc(data, curSize);
352 Pos += curSize;
353 }
354 if (processedSize)
355 *processedSize = (UInt32)curSize;
356 if (curSize != size)
357 {
358 // _overflow = true;
359 return E_FAIL;
360 }
361 return S_OK;
362 }
363
364
365 Z7_CLASS_IMP_NOQIB_1(
366 CCrcOutStream
367 , ISequentialOutStream
368 )
369 public:
370 bool CalcCrc;
371 UInt32 Crc;
372 UInt64 Pos;
373
374 CCrcOutStream(): CalcCrc(true) {}
375 void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
376 void Calc(const void *data, size_t size)
377 {
378 Crc = CrcUpdate(Crc, data, size);
379 }
380 };
381
382 Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
383 {
384 if (CalcCrc)
385 Calc(data, size);
386 Pos += size;
387 if (processedSize)
388 *processedSize = size;
389 return S_OK;
390 }
391
392 // #include "../../../../C/My_sys_time.h"
393
394 static UInt64 GetTimeCount()
395 {
396 #ifdef USE_POSIX_TIME
397 #ifdef USE_POSIX_TIME2
398 timeval v;
399 if (gettimeofday(&v, NULL) == 0)
400 return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
401 return (UInt64)time(NULL) * 1000000;
402 #else
403 return time(NULL);
404 #endif
405 #else
406 LARGE_INTEGER value;
407 if (::QueryPerformanceCounter(&value))
408 return (UInt64)value.QuadPart;
409 return GetTickCount();
410 #endif
411 }
412
413 static UInt64 GetFreq()
414 {
415 #ifdef USE_POSIX_TIME
416 #ifdef USE_POSIX_TIME2
417 return 1000000;
418 #else
419 return 1;
420 #endif
421 #else
422 LARGE_INTEGER value;
423 if (::QueryPerformanceFrequency(&value))
424 return (UInt64)value.QuadPart;
425 return 1000;
426 #endif
427 }
428
429
430 #ifdef USE_POSIX_TIME
431
432 struct CUserTime
433 {
434 UInt64 Sum;
435 clock_t Prev;
436
437 void Init()
438 {
439 // Prev = clock();
440 Sum = 0;
441 Prev = 0;
442 Update();
443 Sum = 0;
444 }
445
446 void Update()
447 {
448 tms t;
449 /* clock_t res = */ times(&t);
450 clock_t newVal = t.tms_utime + t.tms_stime;
451 Sum += (UInt64)(newVal - Prev);
452 Prev = newVal;
453
454 /*
455 clock_t v = clock();
456 if (v != -1)
457 {
458 Sum += v - Prev;
459 Prev = v;
460 }
461 */
462 }
463 UInt64 GetUserTime()
464 {
465 Update();
466 return Sum;
467 }
468 };
469
470 #else
471
472
473 struct CUserTime
474 {
475 bool UseTick;
476 DWORD Prev_Tick;
477 UInt64 Prev;
478 UInt64 Sum;
479
480 void Init()
481 {
482 UseTick = false;
483 Prev_Tick = 0;
484 Prev = 0;
485 Sum = 0;
486 Update();
487 Sum = 0;
488 }
489 UInt64 GetUserTime()
490 {
491 Update();
492 return Sum;
493 }
494 void Update();
495 };
496
497 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
498
499 void CUserTime::Update()
500 {
501 DWORD new_Tick = GetTickCount();
502 FILETIME creationTime, exitTime, kernelTime, userTime;
503 if (!UseTick &&
504 #ifdef UNDER_CE
505 ::GetThreadTimes(::GetCurrentThread()
506 #else
507 ::GetProcessTimes(::GetCurrentProcess()
508 #endif
509 , &creationTime, &exitTime, &kernelTime, &userTime))
510 {
511 UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
512 Sum += newVal - Prev;
513 Prev = newVal;
514 }
515 else
516 {
517 UseTick = true;
518 Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
519 }
520 Prev_Tick = new_Tick;
521 }
522
523
524 #endif
525
526 static UInt64 GetUserFreq()
527 {
528 #ifdef USE_POSIX_TIME
529 // return CLOCKS_PER_SEC;
530 return (UInt64)sysconf(_SC_CLK_TCK);
531 #else
532 return 10000000;
533 #endif
534 }
535
536 class CBenchProgressStatus Z7_final
537 {
538 #ifndef Z7_ST
539 NSynchronization::CCriticalSection CS;
540 #endif
541 public:
542 HRESULT Res;
543 bool EncodeMode;
544 void SetResult(HRESULT res)
545 {
546 #ifndef Z7_ST
547 NSynchronization::CCriticalSectionLock lock(CS);
548 #endif
549 Res = res;
550 }
551 HRESULT GetResult()
552 {
553 #ifndef Z7_ST
554 NSynchronization::CCriticalSectionLock lock(CS);
555 #endif
556 return Res;
557 }
558 };
559
560 struct CBenchInfoCalc
561 {
562 CBenchInfo BenchInfo;
563 CUserTime UserTime;
564
565 void SetStartTime();
566 void SetFinishTime(CBenchInfo &dest);
567 };
568
569 void CBenchInfoCalc::SetStartTime()
570 {
571 BenchInfo.GlobalFreq = GetFreq();
572 BenchInfo.UserFreq = GetUserFreq();
573 BenchInfo.GlobalTime = ::GetTimeCount();
574 BenchInfo.UserTime = 0;
575 UserTime.Init();
576 }
577
578 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
579 {
580 dest = BenchInfo;
581 dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
582 dest.UserTime = UserTime.GetUserTime();
583 }
584
585 class CBenchProgressInfo Z7_final:
586 public ICompressProgressInfo,
587 public CMyUnknownImp,
588 public CBenchInfoCalc
589 {
590 Z7_COM_UNKNOWN_IMP_0
591 Z7_IFACE_COM7_IMP(ICompressProgressInfo)
592 public:
593 CBenchProgressStatus *Status;
594 IBenchCallback *Callback;
595
596 CBenchProgressInfo(): Callback(NULL) {}
597 };
598
599
600 Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
601 {
602 HRESULT res = Status->GetResult();
603 if (res != S_OK)
604 return res;
605 if (!Callback)
606 return res;
607
608 /*
609 static UInt64 inSizePrev = 0;
610 static UInt64 outSizePrev = 0;
611 UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
612 if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; }
613 if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; }
614 UInt64 percents = delta2 * 1000;
615 if (delta1 != 0)
616 percents /= delta1;
617 printf("=== %7d %7d %7d %7d ratio = %4d\n",
618 (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
619 (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
620 (unsigned)percents);
621 */
622
623 CBenchInfo info;
624 SetFinishTime(info);
625 if (Status->EncodeMode)
626 {
627 info.UnpackSize = BenchInfo.UnpackSize + *inSize;
628 info.PackSize = BenchInfo.PackSize + *outSize;
629 res = Callback->SetEncodeResult(info, false);
630 }
631 else
632 {
633 info.PackSize = BenchInfo.PackSize + *inSize;
634 info.UnpackSize = BenchInfo.UnpackSize + *outSize;
635 res = Callback->SetDecodeResult(info, false);
636 }
637 if (res != S_OK)
638 Status->SetResult(res);
639 return res;
640 }
641
642 static const unsigned kSubBits = 8;
643
644 static unsigned GetLogSize(UInt64 size)
645 {
646 unsigned i = 0;
647 for (;;)
648 {
649 i++; size >>= 1; if (size == 0) break;
650 }
651 return i;
652 }
653
654
655 static UInt32 GetLogSize_Sub(UInt64 size)
656 {
657 if (size <= 1)
658 return 0;
659 const unsigned i = GetLogSize(size) - 1;
660 UInt32 v;
661 if (i <= kSubBits)
662 v = (UInt32)(size) << (kSubBits - i);
663 else
664 v = (UInt32)(size >> (i - kSubBits));
665 return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
666 }
667
668
669 static UInt64 Get_UInt64_from_double(double v)
670 {
671 const UInt64 kMaxVal = (UInt64)1 << 62;
672 if (v > (double)(Int64)kMaxVal)
673 return kMaxVal;
674 return (UInt64)v;
675 }
676
677 static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
678 {
679 if (d == 0)
680 d = 1;
681 const double v =
682 (double)(Int64)m1 *
683 (double)(Int64)m2 /
684 (double)(Int64)d;
685 return Get_UInt64_from_double(v);
686 /*
687 unsigned n1 = GetLogSize(m1);
688 unsigned n2 = GetLogSize(m2);
689 while (n1 + n2 > 64)
690 {
691 if (n1 >= n2)
692 {
693 m1 >>= 1;
694 n1--;
695 }
696 else
697 {
698 m2 >>= 1;
699 n2--;
700 }
701 d >>= 1;
702 }
703
704 if (d == 0)
705 d = 1;
706 return m1 * m2 / d;
707 */
708 }
709
710
711 UInt64 CBenchInfo::GetUsage() const
712 {
713 UInt64 userTime = UserTime;
714 UInt64 userFreq = UserFreq;
715 UInt64 globalTime = GlobalTime;
716 UInt64 globalFreq = GlobalFreq;
717
718 if (userFreq == 0)
719 userFreq = 1;
720 if (globalTime == 0)
721 globalTime = 1;
722
723 const double v =
724 ((double)(Int64)userTime / (double)(Int64)userFreq)
725 * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
726 * (double)(Int64)kBenchmarkUsageMult;
727 return Get_UInt64_from_double(v);
728 /*
729 return MyMultDiv64(
730 MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
731 globalFreq, globalTime);
732 */
733 }
734
735
736 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
737 {
738 if (UserTime == 0)
739 {
740 return 0;
741 // userTime = 1;
742 }
743 UInt64 globalFreq = GlobalFreq;
744 if (globalFreq == 0)
745 globalFreq = 1;
746
747 const double v =
748 ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
749 * ((double)(Int64)UserFreq / (double)(Int64)UserTime)
750 * (double)(Int64)rating;
751 return Get_UInt64_from_double(v);
752 /*
753 return MyMultDiv64(
754 MyMultDiv64(rating, UserFreq, UserTime),
755 GlobalTime, globalFreq);
756 */
757 }
758
759
760 UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
761 {
762 return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
763 }
764
765 static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
766 {
767 return complexity >= 0 ?
768 size * (UInt32)complexity :
769 size / (UInt32)(-complexity);
770 }
771
772 struct CBenchProps
773 {
774 bool LzmaRatingMode;
775
776 Int32 EncComplex;
777 Int32 DecComplexCompr;
778 Int32 DecComplexUnc;
779
780 unsigned KeySize;
781
782 CBenchProps():
783 LzmaRatingMode(false),
784 KeySize(0)
785 {}
786
787 void SetLzmaCompexity();
788
789 UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
790 {
791 const UInt32 kMinSize = 100;
792 if (unpackSize < kMinSize)
793 unpackSize = kMinSize;
794 return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
795 }
796
797 UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
798 {
799 return
800 GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
801 GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
802 }
803
804 UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
805 UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
806 };
807
808 void CBenchProps::SetLzmaCompexity()
809 {
810 EncComplex = 1200;
811 DecComplexUnc = 4;
812 DecComplexCompr = 190;
813 LzmaRatingMode = true;
814 }
815
816 UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
817 {
818 if (dictSize < (1 << kBenchMinDicLogSize))
819 dictSize = (1 << kBenchMinDicLogSize);
820 Int32 encComplex = EncComplex;
821 if (LzmaRatingMode)
822 {
823 /*
824 for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
825 {
826 unsigned rr = GetLogSize_Sub(uu);
827 printf("\n%16I64x , log = %4x", uu, rr);
828 uu += 1;
829 uu += uu / 50;
830 }
831 */
832 // throw 1;
833 const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
834 encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
835 }
836 const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
837 return MyMultDiv64(numCommands, freq, elapsedTime);
838 }
839
840 UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
841 {
842 const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
843 return MyMultDiv64(numCommands, freq, elapsedTime);
844 }
845
846
847
848 UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
849 {
850 CBenchProps props;
851 props.SetLzmaCompexity();
852 return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
853 }
854
855 UInt64 CBenchInfo::GetRating_LzmaDec() const
856 {
857 CBenchProps props;
858 props.SetLzmaCompexity();
859 return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
860 }
861
862
863 #ifndef Z7_ST
864
865 #define NUM_CPU_LEVELS_MAX 3
866
867 struct CAffinityMode
868 {
869 unsigned NumBundleThreads;
870 unsigned NumLevels;
871 unsigned NumCoreThreads;
872 unsigned NumCores;
873 // unsigned DivideNum;
874 UInt32 Sizes[NUM_CPU_LEVELS_MAX];
875
876 void SetLevels(unsigned numCores, unsigned numCoreThreads);
877 DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
878 bool NeedAffinity() const { return NumBundleThreads != 0; }
879
880 WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
881 {
882 if (NeedAffinity())
883 {
884 CCpuSet cpuSet;
885 GetAffinityMask(bundleIndex, &cpuSet);
886 return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
887 }
888 return thread.Create(startAddress, parameter);
889 }
890
891 CAffinityMode():
892 NumBundleThreads(0),
893 NumLevels(0),
894 NumCoreThreads(1)
895 // DivideNum(1)
896 {}
897 };
898
899 void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
900 {
901 NumCores = numCores;
902 NumCoreThreads = numCoreThreads;
903 NumLevels = 0;
904 if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
905 return;
906 UInt32 c = numCores / numCoreThreads;
907 UInt32 c2 = 1;
908 while ((c & 1) == 0)
909 {
910 c >>= 1;
911 c2 <<= 1;
912 }
913 if (c2 != 1)
914 Sizes[NumLevels++] = c2;
915 if (c != 1)
916 Sizes[NumLevels++] = c;
917 if (numCoreThreads != 1)
918 Sizes[NumLevels++] = numCoreThreads;
919 if (NumLevels == 0)
920 Sizes[NumLevels++] = 1;
921
922 /*
923 printf("\n Cores:");
924 for (unsigned i = 0; i < NumLevels; i++)
925 {
926 printf(" %d", Sizes[i]);
927 }
928 printf("\n");
929 */
930 }
931
932
933 DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
934 {
935 CpuSet_Zero(cpuSet);
936
937 if (NumLevels == 0)
938 return 0;
939
940 // printf("\n%2d", bundleIndex);
941
942 /*
943 UInt32 low = 0;
944 if (DivideNum != 1)
945 {
946 low = bundleIndex % DivideNum;
947 bundleIndex /= DivideNum;
948 }
949 */
950
951 UInt32 numGroups = NumCores / NumBundleThreads;
952 UInt32 m = bundleIndex % numGroups;
953 UInt32 v = 0;
954 for (unsigned i = 0; i < NumLevels; i++)
955 {
956 UInt32 size = Sizes[i];
957 while ((size & 1) == 0)
958 {
959 v *= 2;
960 v |= (m & 1);
961 m >>= 1;
962 size >>= 1;
963 }
964 v *= size;
965 v += m % size;
966 m /= size;
967 }
968
969 // UInt32 nb = NumBundleThreads / DivideNum;
970 UInt32 nb = NumBundleThreads;
971
972 DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
973 // v += low;
974 mask <<= v;
975
976 // printf(" %2d %8x \n ", v, (unsigned)mask);
977 #ifdef _WIN32
978 *cpuSet = mask;
979 #else
980 {
981 for (unsigned k = 0; k < nb; k++)
982 CpuSet_Set(cpuSet, v + k);
983 }
984 #endif
985
986 return mask;
987 }
988
989
990 struct CBenchSyncCommon
991 {
992 bool ExitMode;
993 NSynchronization::CManualResetEvent StartEvent;
994
995 CBenchSyncCommon(): ExitMode(false) {}
996 };
997
998 #endif
999
1000
1001
1002 enum E_CheckCrcMode
1003 {
1004 k_CheckCrcMode_Never = 0,
1005 k_CheckCrcMode_Always = 1,
1006 k_CheckCrcMode_FirstPass = 2
1007 };
1008
1009 class CEncoderInfo;
1010
1011 class CEncoderInfo Z7_final
1012 {
1013 Z7_CLASS_NO_COPY(CEncoderInfo)
1014
1015 public:
1016
1017 #ifndef Z7_ST
1018 NWindows::CThread thread[2];
1019 NSynchronization::CManualResetEvent ReadyEvent;
1020 UInt32 NumDecoderSubThreads;
1021 CBenchSyncCommon *Common;
1022 UInt32 EncoderIndex;
1023 UInt32 NumEncoderInternalThreads;
1024 CAffinityMode AffinityMode;
1025 bool IsGlobalMtMode; // if more than one benchmark encoder threads
1026 #endif
1027
1028 CMyComPtr<ICompressCoder> _encoder;
1029 CMyComPtr<ICompressFilter> _encoderFilter;
1030 CBenchProgressInfo *progressInfoSpec[2];
1031 CMyComPtr<ICompressProgressInfo> progressInfo[2];
1032 UInt64 NumIterations;
1033
1034 UInt32 Salt;
1035
1036 #ifdef USE_ALLOCA
1037 size_t AllocaSize;
1038 #endif
1039
1040 unsigned KeySize;
1041 Byte _key[32];
1042 Byte _iv[16];
1043
1044 HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1045 {
1046 RINOK(cp->SetKey(_key, KeySize))
1047 return cp->SetInitVector(_iv, sizeof(_iv));
1048 }
1049
1050 Byte _psw[16];
1051
1052 bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass
1053 used for filter and usual coders */
1054 bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1055 used only for filter */
1056 E_CheckCrcMode CheckCrcMode_Dec;
1057
1058 struct CDecoderInfo
1059 {
1060 CEncoderInfo *Encoder;
1061 UInt32 DecoderIndex;
1062 bool CallbackMode;
1063
1064 #ifdef USE_ALLOCA
1065 size_t AllocaSize;
1066 #endif
1067 };
1068 CDecoderInfo decodersInfo[2];
1069
1070 CMyComPtr<ICompressCoder> _decoders[2];
1071 CMyComPtr<ICompressFilter> _decoderFilter;
1072
1073 HRESULT Results[2];
1074 CBenchmarkOutStream *outStreamSpec;
1075 CMyComPtr<ISequentialOutStream> outStream;
1076 IBenchCallback *callback;
1077 IBenchPrintCallback *printCallback;
1078 UInt32 crc;
1079 size_t kBufferSize;
1080 size_t compressedSize;
1081 const Byte *uncompressedDataPtr;
1082
1083 const Byte *fileData;
1084 CBenchRandomGenerator rg;
1085
1086 CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1087
1088 // CBenchmarkOutStream *propStreamSpec;
1089 Byte propsData[kMaxMethodPropSize];
1090 CBufPtrSeqOutStream *propStreamSpec;
1091 CMyComPtr<ISequentialOutStream> propStream;
1092
1093 unsigned generateDictBits;
1094 COneMethodInfo _method;
1095
1096 // for decode
1097 size_t _uncompressedDataSize;
1098
1099 HRESULT Generate();
1100 HRESULT Encode();
1101 HRESULT Decode(UInt32 decoderIndex);
1102
1103 CEncoderInfo():
1104 #ifndef Z7_ST
1105 Common(NULL),
1106 IsGlobalMtMode(true),
1107 #endif
1108 Salt(0),
1109 KeySize(0),
1110 CheckCrc_Enc(true),
1111 UseRealData_Enc(true),
1112 CheckCrcMode_Dec(k_CheckCrcMode_Always),
1113 outStreamSpec(NULL),
1114 callback(NULL),
1115 printCallback(NULL),
1116 fileData(NULL),
1117 propStreamSpec(NULL)
1118 {}
1119
1120 #ifndef Z7_ST
1121
1122 static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1123 {
1124 HRESULT res;
1125 CEncoderInfo *encoder = (CEncoderInfo *)param;
1126 try
1127 {
1128 #ifdef USE_ALLOCA
1129 alloca(encoder->AllocaSize);
1130 #endif
1131
1132 res = encoder->Encode();
1133 }
1134 catch(...)
1135 {
1136 res = E_FAIL;
1137 }
1138 encoder->Results[0] = res;
1139 if (res != S_OK)
1140 encoder->progressInfoSpec[0]->Status->SetResult(res);
1141 encoder->ReadyEvent.Set();
1142 return THREAD_FUNC_RET_ZERO;
1143 }
1144
1145 static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1146 {
1147 CDecoderInfo *decoder = (CDecoderInfo *)param;
1148
1149 #ifdef USE_ALLOCA
1150 alloca(decoder->AllocaSize);
1151 // printf("\nalloca=%d\n", (unsigned)decoder->AllocaSize);
1152 #endif
1153
1154 CEncoderInfo *encoder = decoder->Encoder;
1155 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1156 return THREAD_FUNC_RET_ZERO;
1157 }
1158
1159 HRESULT CreateEncoderThread()
1160 {
1161 WRes res = 0;
1162 if (!ReadyEvent.IsCreated())
1163 res = ReadyEvent.Create();
1164 if (res == 0)
1165 res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1166 EncoderIndex);
1167 return HRESULT_FROM_WIN32(res);
1168 }
1169
1170 HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1171 #ifdef USE_ALLOCA
1172 , size_t allocaSize
1173 #endif
1174 )
1175 {
1176 CDecoderInfo &decoder = decodersInfo[index];
1177 decoder.DecoderIndex = index;
1178 decoder.Encoder = this;
1179
1180 #ifdef USE_ALLOCA
1181 decoder.AllocaSize = allocaSize;
1182 #endif
1183
1184 decoder.CallbackMode = callbackMode;
1185
1186 WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1187 // EncoderIndex * NumEncoderInternalThreads + index
1188 EncoderIndex
1189 );
1190
1191 return HRESULT_FROM_WIN32(res);
1192 }
1193
1194 #endif
1195 };
1196
1197
1198
1199
1200 static size_t GetBenchCompressedSize(size_t bufferSize)
1201 {
1202 return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1203 // kBufferSize / 2;
1204 }
1205
1206
1207 HRESULT CEncoderInfo::Generate()
1208 {
1209 const COneMethodInfo &method = _method;
1210
1211 // we need extra space, if input data is already compressed
1212 const size_t kCompressedBufferSize = _encoderFilter ?
1213 kBufferSize :
1214 GetBenchCompressedSize(kBufferSize);
1215
1216 if (kCompressedBufferSize < kBufferSize)
1217 return E_FAIL;
1218
1219 uncompressedDataPtr = fileData;
1220 if (fileData)
1221 {
1222 #if !defined(Z7_ST)
1223 if (IsGlobalMtMode)
1224 {
1225 /* we copy the data to local buffer of thread to eliminate
1226 using of shared buffer by different threads */
1227 ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228 memcpy((Byte *)rg, fileData, kBufferSize);
1229 uncompressedDataPtr = (const Byte *)rg;
1230 }
1231 #endif
1232 }
1233 else
1234 {
1235 ALLOC_WITH_HRESULT(&rg, kBufferSize)
1236 // DWORD ttt = GetTickCount();
1237 if (generateDictBits == 0)
1238 rg.GenerateSimpleRandom(Salt);
1239 else
1240 {
1241 if (generateDictBits >= sizeof(size_t) * 8
1242 && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1243 return E_INVALIDARG;
1244 rg.GenerateLz(generateDictBits, Salt);
1245 // return E_ABORT; // for debug
1246 }
1247 // printf("\n%d\n ", GetTickCount() - ttt);
1248
1249 crc = CrcCalc((const Byte *)rg, rg.Size());
1250 uncompressedDataPtr = (const Byte *)rg;
1251 }
1252
1253 if (!outStream)
1254 {
1255 outStreamSpec = new CBenchmarkOutStream;
1256 outStream = outStreamSpec;
1257 }
1258
1259 ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1260
1261 if (_encoderFilter)
1262 {
1263 /* we try to reduce the number of memcpy() in main encoding loop.
1264 so we copy data to temp buffers here */
1265 ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1266 memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1267 memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1268 }
1269
1270 if (!propStream)
1271 {
1272 propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1273 propStream = propStreamSpec;
1274 }
1275 // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1276 // propStreamSpec->Init(true, false);
1277 propStreamSpec->Init(propsData, sizeof(propsData));
1278
1279
1280 CMyComPtr<IUnknown> coder;
1281 if (_encoderFilter)
1282 coder = _encoderFilter;
1283 else
1284 coder = _encoder;
1285 {
1286 CMyComPtr<ICompressSetCoderProperties> scp;
1287 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1288 if (scp)
1289 {
1290 const UInt64 reduceSize = kBufferSize;
1291
1292 /* in posix new thread uses same affinity as parent thread,
1293 so we don't need to send affinity to coder in posix */
1294 UInt64 affMask;
1295 #if !defined(Z7_ST) && defined(_WIN32)
1296 {
1297 CCpuSet cpuSet;
1298 affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1299 }
1300 #else
1301 affMask = 0;
1302 #endif
1303 // affMask <<= 3; // debug line: to test no affinity in coder;
1304 // affMask = 0;
1305
1306 RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1307 }
1308 else
1309 {
1310 if (method.AreThereNonOptionalProps())
1311 return E_INVALIDARG;
1312 }
1313
1314 CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1315 coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1316 if (writeCoderProps)
1317 {
1318 RINOK(writeCoderProps->WriteCoderProperties(propStream))
1319 }
1320
1321 {
1322 CMyComPtr<ICryptoSetPassword> sp;
1323 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1324 if (sp)
1325 {
1326 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1327
1328 // we must call encoding one time to calculate password key for key cache.
1329 // it must be after WriteCoderProperties!
1330 Byte temp[16];
1331 memset(temp, 0, sizeof(temp));
1332
1333 if (_encoderFilter)
1334 {
1335 _encoderFilter->Init();
1336 _encoderFilter->Filter(temp, sizeof(temp));
1337 }
1338 else
1339 {
1340 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1341 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1342 inStreamSpec->Init(temp, sizeof(temp));
1343
1344 CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1345 CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1346 crcStreamSpec->Init();
1347
1348 RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1349 }
1350 }
1351 }
1352 }
1353
1354 return S_OK;
1355 }
1356
1357
1358 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1359 {
1360 while (size != 0)
1361 {
1362 UInt32 cur = crc ? 1 << 17 : 1 << 24;
1363 if (cur > size)
1364 cur = (UInt32)size;
1365 UInt32 processed = filter->Filter(data, cur);
1366 /* if (processed > size) (in AES filter), we must fill last block with zeros.
1367 but it is not important for benchmark. So we just copy that data without filtering.
1368 if (processed == 0) then filter can't process more */
1369 if (processed > size || processed == 0)
1370 processed = (UInt32)size;
1371 if (crc)
1372 *crc = CrcUpdate(*crc, data, processed);
1373 data += processed;
1374 size -= processed;
1375 }
1376 }
1377
1378
1379 HRESULT CEncoderInfo::Encode()
1380 {
1381 // printf("\nCEncoderInfo::Generate\n");
1382
1383 RINOK(Generate())
1384
1385 // printf("\n2222\n");
1386
1387 #ifndef Z7_ST
1388 if (Common)
1389 {
1390 Results[0] = S_OK;
1391 WRes wres = ReadyEvent.Set();
1392 if (wres == 0)
1393 wres = Common->StartEvent.Lock();
1394 if (wres != 0)
1395 return HRESULT_FROM_WIN32(wres);
1396 if (Common->ExitMode)
1397 return S_OK;
1398 }
1399 else
1400 #endif
1401 {
1402 CBenchProgressInfo *bpi = progressInfoSpec[0];
1403 bpi->SetStartTime();
1404 }
1405
1406
1407 CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1408 bi.UnpackSize = 0;
1409 bi.PackSize = 0;
1410 CMyComPtr<ICryptoProperties> cp;
1411 CMyComPtr<IUnknown> coder;
1412 if (_encoderFilter)
1413 coder = _encoderFilter;
1414 else
1415 coder = _encoder;
1416 coder.QueryInterface(IID_ICryptoProperties, &cp);
1417 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1418 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1419
1420 if (cp)
1421 {
1422 RINOK(Set_Key_and_IV(cp))
1423 }
1424
1425 compressedSize = 0;
1426 if (_encoderFilter)
1427 compressedSize = kBufferSize;
1428
1429 // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1430 UInt64 prev = 0;
1431
1432 const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1433 const bool useCrc = (mask < NumIterations);
1434 bool crcPrev_defined = false;
1435 UInt32 crcPrev = 0;
1436
1437 bool useRealData_Enc = UseRealData_Enc;
1438 bool data_Was_Changed = false;
1439 if (useRealData_Enc)
1440 {
1441 /* we want memcpy() for each iteration including first iteration.
1442 So results will be equal for different number of iterations */
1443 data_Was_Changed = true;
1444 }
1445
1446 const UInt64 numIterations = NumIterations;
1447 UInt64 i = numIterations;
1448 // printCallback->NewLine();
1449
1450 while (i != 0)
1451 {
1452 i--;
1453 if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1454 {
1455 prev = bi.UnpackSize;
1456 RINOK(printCallback->CheckBreak())
1457 }
1458
1459 /*
1460 CBenchInfo info;
1461 progressInfoSpec[0]->SetStartTime();
1462 */
1463
1464 bool calcCrc = false;
1465 if (useCrc)
1466 calcCrc = (((UInt32)i & mask) == 0);
1467
1468 if (_encoderFilter)
1469 {
1470 Byte *filterData = rgCopy;
1471 if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1472 {
1473 // printf("\nfilterData = (Byte *)*outStreamSpec;\n");
1474 filterData = (Byte *)*outStreamSpec;
1475 if (data_Was_Changed)
1476 {
1477 // printf("\nmemcpy(filterData, uncompressedDataPtr\n");
1478 memcpy(filterData, uncompressedDataPtr, kBufferSize);
1479 }
1480 data_Was_Changed = true;
1481 }
1482 _encoderFilter->Init();
1483 if (calcCrc)
1484 {
1485 // printf("\nInitCrc\n");
1486 outStreamSpec->InitCrc();
1487 }
1488 // printf("\nMy_FilterBench\n");
1489 My_FilterBench(_encoderFilter, filterData, kBufferSize,
1490 calcCrc ? &outStreamSpec->Crc : NULL);
1491 }
1492 else
1493 {
1494 outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1495 inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1496 RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1497 if (!inStreamSpec->WasFinished())
1498 return E_FAIL;
1499 if (compressedSize != outStreamSpec->Pos)
1500 {
1501 if (compressedSize != 0)
1502 return E_FAIL;
1503 compressedSize = outStreamSpec->Pos;
1504 }
1505 }
1506
1507 // outStreamSpec->Print();
1508
1509 if (calcCrc)
1510 {
1511 const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1512 if (crcPrev_defined && crcPrev != crc2)
1513 return E_FAIL;
1514 crcPrev = crc2;
1515 crcPrev_defined = true;
1516 }
1517
1518 bi.UnpackSize += kBufferSize;
1519 bi.PackSize += compressedSize;
1520
1521 /*
1522 {
1523 progressInfoSpec[0]->SetFinishTime(info);
1524 info.UnpackSize = 0;
1525 info.PackSize = 0;
1526 info.NumIterations = 1;
1527
1528 info.UnpackSize = kBufferSize;
1529 info.PackSize = compressedSize;
1530 // printf("\n%7d\n", encoder.compressedSize);
1531
1532 RINOK(callback->SetEncodeResult(info, true))
1533 printCallback->NewLine();
1534 }
1535 */
1536
1537 }
1538
1539 _encoder.Release();
1540 _encoderFilter.Release();
1541 return S_OK;
1542 }
1543
1544
1545 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1546 {
1547 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1548 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1549 CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1550 CMyComPtr<IUnknown> coder;
1551 if (_decoderFilter)
1552 {
1553 if (decoderIndex != 0)
1554 return E_FAIL;
1555 coder = _decoderFilter;
1556 }
1557 else
1558 coder = decoder;
1559
1560 // printf("\ndecoderIndex = %d, stack = %p", decoderIndex, &coder);
1561
1562 CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1563 coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1564 if (!setDecProps && propStreamSpec->GetPos() != 0)
1565 return E_FAIL;
1566
1567 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1568 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1569
1570 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1571 pi->BenchInfo.UnpackSize = 0;
1572 pi->BenchInfo.PackSize = 0;
1573
1574 #ifndef Z7_ST
1575 {
1576 CMyComPtr<ICompressSetCoderMt> setCoderMt;
1577 coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1578 if (setCoderMt)
1579 {
1580 RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1581 }
1582 }
1583 #endif
1584
1585 CMyComPtr<ICompressSetCoderProperties> scp;
1586 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1587 if (scp)
1588 {
1589 const UInt64 reduceSize = _uncompressedDataSize;
1590 RINOK(_method.SetCoderProps(scp, &reduceSize))
1591 }
1592
1593 CMyComPtr<ICryptoProperties> cp;
1594 coder.QueryInterface(IID_ICryptoProperties, &cp);
1595
1596 if (setDecProps)
1597 {
1598 RINOK(setDecProps->SetDecoderProperties2(
1599 /* (const Byte *)*propStreamSpec, */
1600 propsData,
1601 (UInt32)propStreamSpec->GetPos()))
1602 }
1603
1604 {
1605 CMyComPtr<ICryptoSetPassword> sp;
1606 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1607 if (sp)
1608 {
1609 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1610 }
1611 }
1612
1613 UInt64 prev = 0;
1614
1615 if (cp)
1616 {
1617 RINOK(Set_Key_and_IV(cp))
1618 }
1619
1620 CMyComPtr<ICompressSetFinishMode> setFinishMode;
1621
1622 if (_decoderFilter)
1623 {
1624 if (compressedSize > rgCopy.Size())
1625 return E_FAIL;
1626 }
1627 else
1628 {
1629 decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1630 }
1631
1632 const UInt64 numIterations = NumIterations;
1633 const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1634
1635 for (UInt64 i = 0; i < numIterations; i++)
1636 {
1637 if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1638 {
1639 RINOK(printCallback->CheckBreak())
1640 prev = pi->BenchInfo.UnpackSize;
1641 }
1642
1643 const UInt64 outSize = kBufferSize;
1644 bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1645
1646 crcOutStreamSpec->Init();
1647
1648 if (_decoderFilter)
1649 {
1650 Byte *filterData = (Byte *)*outStreamSpec;
1651 if (calcCrc)
1652 {
1653 calcCrc = (i == 0);
1654 if (checkCrcMode == k_CheckCrcMode_Always)
1655 {
1656 calcCrc = true;
1657 memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1658 filterData = rgCopy;
1659 }
1660 }
1661 _decoderFilter->Init();
1662 My_FilterBench(_decoderFilter, filterData, compressedSize,
1663 calcCrc ? &crcOutStreamSpec->Crc : NULL);
1664 }
1665 else
1666 {
1667 crcOutStreamSpec->CalcCrc = calcCrc;
1668 inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1669
1670 if (setFinishMode)
1671 {
1672 RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1673 }
1674
1675 RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1676
1677 if (setFinishMode)
1678 {
1679 if (!inStreamSpec->WasFinished())
1680 return S_FALSE;
1681
1682 CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1683 decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1684
1685 if (getInStreamProcessedSize)
1686 {
1687 UInt64 processed;
1688 RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1689 if (processed != compressedSize)
1690 return S_FALSE;
1691 }
1692 }
1693
1694 if (crcOutStreamSpec->Pos != outSize)
1695 return S_FALSE;
1696 }
1697
1698 if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1699 return S_FALSE;
1700
1701 pi->BenchInfo.UnpackSize += kBufferSize;
1702 pi->BenchInfo.PackSize += compressedSize;
1703 }
1704
1705 decoder.Release();
1706 _decoderFilter.Release();
1707 return S_OK;
1708 }
1709
1710
1711 static const UInt32 kNumThreadsMax = (1 << 12);
1712
1713 struct CBenchEncoders
1714 {
1715 CEncoderInfo *encoders;
1716 CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1717 ~CBenchEncoders() { delete []encoders; }
1718 };
1719
1720
1721 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1722 {
1723 if (numCommands < (1 << 4))
1724 numCommands = (1 << 4);
1725 UInt64 res = complexInCommands / numCommands;
1726 return (res == 0 ? 1 : res);
1727 }
1728
1729
1730
1731 #ifndef Z7_ST
1732
1733 // ---------- CBenchThreadsFlusher ----------
1734
1735 struct CBenchThreadsFlusher
1736 {
1737 CBenchEncoders *EncodersSpec;
1738 CBenchSyncCommon Common;
1739 unsigned NumThreads;
1740 bool NeedClose;
1741
1742 CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1743
1744 ~CBenchThreadsFlusher()
1745 {
1746 StartAndWait(true);
1747 }
1748
1749 WRes StartAndWait(bool exitMode = false);
1750 };
1751
1752
1753 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1754 {
1755 if (!NeedClose)
1756 return 0;
1757
1758 Common.ExitMode = exitMode;
1759 WRes res = Common.StartEvent.Set();
1760
1761 for (unsigned i = 0; i < NumThreads; i++)
1762 {
1763 NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1764 if (t.IsCreated())
1765 {
1766 WRes res2 = t.Wait_Close();
1767 if (res == 0)
1768 res = res2;
1769 }
1770 }
1771 NeedClose = false;
1772 return res;
1773 }
1774
1775 #endif // Z7_ST
1776
1777
1778
1779 static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1780 {
1781 for (size_t i = 0; i < size; i++)
1782 {
1783 data[i] = (Byte)startValue;
1784 startValue++;
1785 }
1786 }
1787
1788
1789
1790 static HRESULT MethodBench(
1791 DECL_EXTERNAL_CODECS_LOC_VARS
1792 UInt64 complexInCommands,
1793 #ifndef Z7_ST
1794 bool oldLzmaBenchMode,
1795 UInt32 numThreads,
1796 const CAffinityMode *affinityMode,
1797 #endif
1798 const COneMethodInfo &method2,
1799 size_t uncompressedDataSize,
1800 const Byte *fileData,
1801 unsigned generateDictBits,
1802
1803 IBenchPrintCallback *printCallback,
1804 IBenchCallback *callback,
1805 CBenchProps *benchProps)
1806 {
1807 COneMethodInfo method = method2;
1808 UInt64 methodId;
1809 UInt32 numStreams;
1810 bool isFilter;
1811 const int codecIndex = FindMethod_Index(
1812 EXTERNAL_CODECS_LOC_VARS
1813 method.MethodName, true,
1814 methodId, numStreams, isFilter);
1815 if (codecIndex < 0)
1816 return E_NOTIMPL;
1817 if (numStreams != 1)
1818 return E_INVALIDARG;
1819
1820 UInt32 numEncoderThreads = 1;
1821 UInt32 numSubDecoderThreads = 1;
1822
1823 #ifndef Z7_ST
1824 numEncoderThreads = numThreads;
1825
1826 if (oldLzmaBenchMode)
1827 if (methodId == k_LZMA)
1828 {
1829 if (numThreads == 1 && method.Get_NumThreads() < 0)
1830 method.AddProp_NumThreads(1);
1831 const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1832 if (numThreads > 1 && numLzmaThreads > 1)
1833 {
1834 numEncoderThreads = (numThreads + 1) / 2; // 20.03
1835 numSubDecoderThreads = 2;
1836 }
1837 }
1838
1839 const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1840
1841 #endif
1842
1843 CBenchEncoders encodersSpec(numEncoderThreads);
1844 CEncoderInfo *encoders = encodersSpec.encoders;
1845
1846 UInt32 i;
1847
1848 for (i = 0; i < numEncoderThreads; i++)
1849 {
1850 CEncoderInfo &encoder = encoders[i];
1851 encoder.callback = (i == 0) ? callback : NULL;
1852 encoder.printCallback = printCallback;
1853
1854 #ifndef Z7_ST
1855 encoder.EncoderIndex = i;
1856 encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1857 encoder.AffinityMode = *affinityMode;
1858
1859 /*
1860 if (numSubDecoderThreads > 1)
1861 if (encoder.AffinityMode.NeedAffinity()
1862 && encoder.AffinityMode.NumBundleThreads == 1)
1863 {
1864 // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1865 if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1866 encoder.AffinityMode.NumBundleThreads *= 2;
1867 }
1868 */
1869
1870 #endif
1871
1872 {
1873 CCreatedCoder cod;
1874 RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1875 encoder._encoder = cod.Coder;
1876 if (!encoder._encoder && !encoder._encoderFilter)
1877 return E_NOTIMPL;
1878 }
1879
1880 SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17);
1881 SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1882 SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1883
1884 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1885 {
1886 CCreatedCoder cod;
1887 CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1888 RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1889 decoder = cod.Coder;
1890 if (!encoder._decoderFilter && !decoder)
1891 return E_NOTIMPL;
1892 }
1893
1894 encoder.UseRealData_Enc =
1895 encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1896
1897 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1898 if (benchProps->DecComplexCompr +
1899 benchProps->DecComplexUnc <= 30)
1900 encoder.CheckCrcMode_Dec =
1901 k_CheckCrcMode_FirstPass; // for filters
1902 // k_CheckCrcMode_Never; // for debug
1903 // k_CheckCrcMode_Always; // for debug
1904 if (fileData)
1905 {
1906 encoder.UseRealData_Enc = true;
1907 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1908 }
1909 }
1910
1911 UInt32 crc = 0;
1912 if (fileData)
1913 crc = CrcCalc(fileData, uncompressedDataSize);
1914
1915 for (i = 0; i < numEncoderThreads; i++)
1916 {
1917 CEncoderInfo &encoder = encoders[i];
1918 encoder._method = method;
1919 encoder.generateDictBits = generateDictBits;
1920 encoder._uncompressedDataSize = uncompressedDataSize;
1921 encoder.kBufferSize = uncompressedDataSize;
1922 encoder.fileData = fileData;
1923 encoder.crc = crc;
1924 }
1925
1926 CBenchProgressStatus status;
1927 status.Res = S_OK;
1928 status.EncodeMode = true;
1929
1930 #ifndef Z7_ST
1931 CBenchThreadsFlusher encoderFlusher;
1932 if (mtEncMode)
1933 {
1934 WRes wres = encoderFlusher.Common.StartEvent.Create();
1935 if (wres != 0)
1936 return HRESULT_FROM_WIN32(wres);
1937 encoderFlusher.NumThreads = numEncoderThreads;
1938 encoderFlusher.EncodersSpec = &encodersSpec;
1939 encoderFlusher.NeedClose = true;
1940 }
1941 #endif
1942
1943 for (i = 0; i < numEncoderThreads; i++)
1944 {
1945 CEncoderInfo &encoder = encoders[i];
1946 encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1947 // encoder.NumIterations = 3;
1948 {
1949 #if 0
1950 #define kCrcPoly 0xEDB88320
1951 UInt32 r = i;
1952 unsigned num = numEncoderThreads < 256 ? 8 : 16;
1953 do
1954 r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
1955 while (--num);
1956 encoder.Salt = r;
1957 #else
1958 UInt32 salt0 = g_CrcTable[(Byte)i];
1959 UInt32 salt1 = g_CrcTable[(Byte)(i >> 8)];
1960 encoder.Salt = salt0 ^ (salt1 << 3);
1961 #endif
1962 }
1963
1964 // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1965 // printf("\n encoder index = %d, Salt = %8x\n", i, encoder.Salt);
1966
1967 encoder.KeySize = benchProps->KeySize;
1968
1969 for (int j = 0; j < 2; j++)
1970 {
1971 CBenchProgressInfo *spec = new CBenchProgressInfo;
1972 encoder.progressInfoSpec[j] = spec;
1973 encoder.progressInfo[j] = spec;
1974 spec->Status = &status;
1975 }
1976
1977 if (i == 0)
1978 {
1979 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1980 bpi->Callback = callback;
1981 bpi->BenchInfo.NumIterations = numEncoderThreads;
1982 }
1983
1984 #ifndef Z7_ST
1985 if (mtEncMode)
1986 {
1987 #ifdef USE_ALLOCA
1988 encoder.AllocaSize = BENCH_ALLOCA_VALUE(i);
1989 #endif
1990
1991 encoder.Common = &encoderFlusher.Common;
1992 encoder.IsGlobalMtMode = numEncoderThreads > 1;
1993 RINOK(encoder.CreateEncoderThread())
1994 }
1995 #endif
1996 }
1997
1998 if (printCallback)
1999 {
2000 RINOK(printCallback->CheckBreak())
2001 }
2002
2003 #ifndef Z7_ST
2004 if (mtEncMode)
2005 {
2006 for (i = 0; i < numEncoderThreads; i++)
2007 {
2008 CEncoderInfo &encoder = encoders[i];
2009 const WRes wres = encoder.ReadyEvent.Lock();
2010 if (wres != 0)
2011 return HRESULT_FROM_WIN32(wres);
2012 RINOK(encoder.Results[0])
2013 }
2014
2015 CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
2016 bpi->SetStartTime();
2017
2018 const WRes wres = encoderFlusher.StartAndWait();
2019 if (status.Res == 0 && wres != 0)
2020 return HRESULT_FROM_WIN32(wres);
2021 }
2022 else
2023 #endif
2024 {
2025 RINOK(encoders[0].Encode())
2026 }
2027
2028 RINOK(status.Res)
2029
2030 CBenchInfo info;
2031
2032 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2033 info.UnpackSize = 0;
2034 info.PackSize = 0;
2035 info.NumIterations = encoders[0].NumIterations;
2036
2037 for (i = 0; i < numEncoderThreads; i++)
2038 {
2039 const CEncoderInfo &encoder = encoders[i];
2040 info.UnpackSize += encoder.kBufferSize;
2041 info.PackSize += encoder.compressedSize;
2042 // printf("\n%7d\n", encoder.compressedSize);
2043 }
2044
2045 RINOK(callback->SetEncodeResult(info, true))
2046
2047
2048
2049
2050 // ---------- Decode ----------
2051
2052 status.Res = S_OK;
2053 status.EncodeMode = false;
2054
2055 const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2056 #ifndef Z7_ST
2057 const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2058 #endif
2059
2060 for (i = 0; i < numEncoderThreads; i++)
2061 {
2062 CEncoderInfo &encoder = encoders[i];
2063
2064 /*
2065 #ifndef Z7_ST
2066 // encoder.affinityMode = *affinityMode;
2067 if (encoder.NumEncoderInternalThreads != 1)
2068 encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2069 #endif
2070 */
2071
2072
2073 if (i == 0)
2074 {
2075 encoder.NumIterations = GetNumIterations(
2076 benchProps->GetNumCommands_Dec(
2077 encoder.compressedSize,
2078 encoder.kBufferSize),
2079 complexInCommands);
2080 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2081 bpi->Callback = callback;
2082 bpi->BenchInfo.NumIterations = numDecoderThreads;
2083 bpi->SetStartTime();
2084 }
2085 else
2086 encoder.NumIterations = encoders[0].NumIterations;
2087
2088 #ifndef Z7_ST
2089 {
2090 const int numSubThreads = method.Get_NumThreads();
2091 encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2092 }
2093 if (mtDecoderMode)
2094 {
2095 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2096 {
2097 const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2098 #ifdef USE_ALLOCA
2099 , BENCH_ALLOCA_VALUE(i * numSubDecoderThreads + j)
2100 #endif
2101 );
2102 RINOK(res)
2103 }
2104 }
2105 else
2106 #endif
2107 {
2108 RINOK(encoder.Decode(0))
2109 }
2110 }
2111
2112 #ifndef Z7_ST
2113 if (mtDecoderMode)
2114 {
2115 WRes wres = 0;
2116 HRESULT res = S_OK;
2117 for (i = 0; i < numEncoderThreads; i++)
2118 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2119 {
2120 CEncoderInfo &encoder = encoders[i];
2121 const WRes wres2 = encoder.thread[j].
2122 // Wait(); // later we can get thread times from thread in UNDER_CE
2123 Wait_Close();
2124 if (wres == 0 && wres2 != 0)
2125 wres = wres2;
2126 const HRESULT res2 = encoder.Results[j];
2127 if (res == 0 && res2 != 0)
2128 res = res2;
2129 }
2130 if (wres != 0)
2131 return HRESULT_FROM_WIN32(wres);
2132 RINOK(res)
2133 }
2134 #endif // Z7_ST
2135
2136 RINOK(status.Res)
2137 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2138
2139 /*
2140 #ifndef Z7_ST
2141 #ifdef UNDER_CE
2142 if (mtDecoderMode)
2143 for (i = 0; i < numEncoderThreads; i++)
2144 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2145 {
2146 FILETIME creationTime, exitTime, kernelTime, userTime;
2147 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2148 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2149 }
2150 #endif
2151 #endif
2152 */
2153
2154 info.UnpackSize = 0;
2155 info.PackSize = 0;
2156 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2157
2158 for (i = 0; i < numEncoderThreads; i++)
2159 {
2160 const CEncoderInfo &encoder = encoders[i];
2161 info.UnpackSize += encoder.kBufferSize;
2162 info.PackSize += encoder.compressedSize;
2163 }
2164
2165 // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2166 RINOK(callback->SetDecodeResult(info, true))
2167
2168 return S_OK;
2169 }
2170
2171
2172
2173 static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2174 {
2175 /*
2176 if (dictSizeLog < 32)
2177 return (UInt32)1 << dictSizeLog;
2178 else
2179 return (UInt32)(Int32)-1;
2180 */
2181 return (UInt64)1 << dictSizeLog;
2182 }
2183
2184
2185 // it's limit of current LZMA implementation that can be changed later
2186 #define kLzmaMaxDictSize ((UInt32)15 << 28)
2187
2188 static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2189 {
2190 if (dict == 0)
2191 dict = 1;
2192 if (dict > kLzmaMaxDictSize)
2193 dict = kLzmaMaxDictSize;
2194 UInt32 hs = (UInt32)dict - 1;
2195 hs |= (hs >> 1);
2196 hs |= (hs >> 2);
2197 hs |= (hs >> 4);
2198 hs |= (hs >> 8);
2199 hs >>= 1;
2200 hs |= 0xFFFF;
2201 if (hs > (1 << 24))
2202 hs >>= 1;
2203 hs++;
2204 hs += (1 << 16);
2205
2206 const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2207 UInt64 blockSize = (UInt64)dict + (1 << 16)
2208 + (multiThread ? (1 << 20) : 0);
2209 blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2210 if (blockSize >= kBlockSizeMax)
2211 blockSize = kBlockSizeMax;
2212
2213 UInt64 son = (UInt64)dict;
2214 if (btMode)
2215 son *= 2;
2216 const UInt64 v = (hs + son) * 4 + blockSize +
2217 (1 << 20) + (multiThread ? (6 << 20) : 0);
2218
2219 // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2220 // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2221 return v;
2222 }
2223
2224
2225 UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2226 {
2227 const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2228 const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2229 if (level < 0)
2230 level = 5;
2231 const int algo = (level < 5 ? 0 : 1);
2232 const int btMode = (algo == 0 ? 0 : 1);
2233
2234 UInt32 numBigThreads = numThreads;
2235 const bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2236 if (btMode)
2237 {
2238 if (!totalBench && lzmaMt)
2239 numBigThreads /= 2;
2240 }
2241 return ((UInt64)kBufferSize + kCompressedBufferSize +
2242 GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2243 }
2244
2245 static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2246 {
2247 // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2248 return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2249 }
2250
2251
2252 // ---------- CRC and HASH ----------
2253
2254 struct CCrcInfo_Base
2255 {
2256 CMidAlignedBuffer Buffer;
2257 const Byte *Data;
2258 size_t Size;
2259 bool CreateLocalBuf;
2260 UInt32 CheckSum_Res;
2261
2262 CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2263
2264 HRESULT Generate(const Byte *data, size_t size);
2265 HRESULT CrcProcess(UInt64 numIterations,
2266 const UInt32 *checkSum, IHasher *hf,
2267 IBenchPrintCallback *callback);
2268 };
2269
2270
2271 // for debug: define it to test hash calling with unaligned data
2272 // #define Z7_BENCH_HASH_ALIGN_BUF_OFFSET 3
2273
2274 HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2275 {
2276 Size = size;
2277 Data = data;
2278 if (!data || CreateLocalBuf)
2279 {
2280 Byte *buf;
2281 const size_t size2 = (size + k_RandBuf_AlignMask) & ~(size_t)k_RandBuf_AlignMask;
2282 if (size2 < size)
2283 return E_OUTOFMEMORY;
2284 #ifdef Z7_BENCH_HASH_ALIGN_BUF_OFFSET
2285 ALLOC_WITH_HRESULT(&Buffer, size2 + Z7_BENCH_HASH_ALIGN_BUF_OFFSET)
2286 buf = Buffer + Z7_BENCH_HASH_ALIGN_BUF_OFFSET;
2287 #else
2288 ALLOC_WITH_HRESULT(&Buffer, size2)
2289 buf = Buffer;
2290 #endif
2291 Data = buf;
2292 if (!data)
2293 RandGen_BufAfterPad(buf, size);
2294 else if (size != 0) // (CreateLocalBuf == true)
2295 memcpy(buf, data, size);
2296 }
2297 return S_OK;
2298 }
2299
2300
2301 #if 1
2302 #define HashUpdate(hf, data, size) hf->Update(data, size)
2303 #else
2304 // for debug:
2305 static void HashUpdate(IHasher *hf, const void *data, UInt32 size)
2306 {
2307 for (;;)
2308 {
2309 if (size == 0)
2310 return;
2311 UInt32 size2 = (size * 0x85EBCA87) % size / 8;
2312 // UInt32 size2 = size / 2;
2313 if (size2 == 0)
2314 size2 = 1;
2315 hf->Update(data, size2);
2316 data = (const void *)((const Byte *)data + size2);
2317 size -= size2;
2318 }
2319 }
2320 #endif
2321
2322
2323 HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2324 const UInt32 *checkSum, IHasher *hf,
2325 IBenchPrintCallback *callback)
2326 {
2327 MY_ALIGN(16)
2328 UInt32 hash32[64 / 4];
2329 memset(hash32, 0, sizeof(hash32));
2330
2331 CheckSum_Res = 0;
2332
2333 const UInt32 hashSize = hf->GetDigestSize();
2334 if (hashSize > sizeof(hash32))
2335 return S_FALSE;
2336
2337 const Byte *buf = Data;
2338 const size_t size = Size;
2339 UInt32 checkSum_Prev = 0;
2340
2341 UInt64 prev = 0;
2342 UInt64 cur = 0;
2343
2344 do
2345 {
2346 hf->Init();
2347 size_t pos = 0;
2348 do
2349 {
2350 const size_t rem = size - pos;
2351 const UInt32 kStep = ((UInt32)1 << 31);
2352 const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2353 HashUpdate(hf, buf + pos, curSize);
2354 pos += curSize;
2355 }
2356 while (pos != size);
2357
2358 hf->Final((Byte *)(void *)hash32);
2359 UInt32 sum = 0;
2360 for (UInt32 j = 0; j < hashSize; j += 4)
2361 {
2362 sum = rotlFixed(sum, 11);
2363 sum += GetUi32((const Byte *)(const void *)hash32 + j);
2364 }
2365 if (checkSum)
2366 {
2367 if (sum != *checkSum)
2368 return S_FALSE;
2369 }
2370 else
2371 {
2372 checkSum_Prev = sum;
2373 checkSum = &checkSum_Prev;
2374 }
2375 if (callback)
2376 {
2377 cur += size;
2378 if (cur - prev >= ((UInt32)1 << 30))
2379 {
2380 prev = cur;
2381 RINOK(callback->CheckBreak())
2382 }
2383 }
2384 }
2385 while (--numIterations);
2386
2387 CheckSum_Res = checkSum_Prev;
2388 return S_OK;
2389 }
2390
2391 extern
2392 UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2393 UInt32 g_BenchCpuFreqTemp = 1;
2394
2395 #define YY1 sum += val; sum ^= val;
2396 #define YY3 YY1 YY1 YY1 YY1
2397 #define YY5 YY3 YY3 YY3 YY3
2398 #define YY7 YY5 YY5 YY5 YY5
2399 static const UInt32 kNumFreqCommands = 128;
2400
2401 EXTERN_C_BEGIN
2402
2403 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2404 {
2405 for (UInt32 i = 0; i < num; i++)
2406 {
2407 YY7
2408 }
2409 return sum;
2410 }
2411
2412 EXTERN_C_END
2413
2414
2415 #ifndef Z7_ST
2416
2417 struct CBaseThreadInfo
2418 {
2419 NWindows::CThread Thread;
2420 IBenchPrintCallback *Callback;
2421 HRESULT CallbackRes;
2422
2423 WRes Wait_If_Created()
2424 {
2425 if (!Thread.IsCreated())
2426 return 0;
2427 return Thread.Wait_Close();
2428 }
2429 };
2430
2431 struct CFreqInfo: public CBaseThreadInfo
2432 {
2433 UInt32 ValRes;
2434 UInt32 Size;
2435 UInt64 NumIterations;
2436 };
2437
2438 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2439 {
2440 CFreqInfo *p = (CFreqInfo *)param;
2441
2442 UInt32 sum = g_BenchCpuFreqTemp;
2443 for (UInt64 k = p->NumIterations; k > 0; k--)
2444 {
2445 if (p->Callback)
2446 {
2447 p->CallbackRes = p->Callback->CheckBreak();
2448 if (p->CallbackRes != S_OK)
2449 break;
2450 }
2451 sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2452 }
2453 p->ValRes = sum;
2454 return THREAD_FUNC_RET_ZERO;
2455 }
2456
2457 struct CFreqThreads
2458 {
2459 CFreqInfo *Items;
2460 UInt32 NumThreads;
2461
2462 CFreqThreads(): Items(NULL), NumThreads(0) {}
2463
2464 WRes WaitAll()
2465 {
2466 WRes wres = 0;
2467 for (UInt32 i = 0; i < NumThreads; i++)
2468 {
2469 WRes wres2 = Items[i].Wait_If_Created();
2470 if (wres == 0 && wres2 != 0)
2471 wres = wres2;
2472 }
2473 NumThreads = 0;
2474 return wres;
2475 }
2476
2477 ~CFreqThreads()
2478 {
2479 WaitAll();
2480 delete []Items;
2481 }
2482 };
2483
2484
2485 static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2486
2487 struct CCrcInfo: public CBaseThreadInfo
2488 {
2489 const Byte *Data;
2490 size_t Size;
2491 UInt64 NumIterations;
2492 bool CheckSumDefined;
2493 UInt32 CheckSum;
2494 CMyComPtr<IHasher> Hasher;
2495 HRESULT Res;
2496 UInt32 CheckSum_Res;
2497
2498 #ifndef Z7_ST
2499 NSynchronization::CManualResetEvent ReadyEvent;
2500 UInt32 ThreadIndex;
2501 CBenchSyncCommon *Common;
2502 CAffinityMode AffinityMode;
2503 #endif
2504
2505 // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2506 // so we uses non-local CCrcInfo_Base.
2507 CCrcInfo_Base crcib;
2508
2509 HRESULT CreateThread()
2510 {
2511 WRes res = 0;
2512 if (!ReadyEvent.IsCreated())
2513 res = ReadyEvent.Create();
2514 if (res == 0)
2515 res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2516 ThreadIndex);
2517 return HRESULT_FROM_WIN32(res);
2518 }
2519
2520 #ifdef USE_ALLOCA
2521 size_t AllocaSize;
2522 #endif
2523
2524 void Process();
2525
2526 CCrcInfo(): Res(E_FAIL) {}
2527 };
2528
2529 static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2530 // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2531
2532 void CCrcInfo::Process()
2533 {
2534 crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2535 // we can use additional Generate() passes to reduce some time effects for new page allocation
2536 // for (unsigned y = 0; y < 10; y++)
2537 Res = crcib.Generate(Data, Size);
2538
2539 // if (Common)
2540 {
2541 WRes wres = ReadyEvent.Set();
2542 if (wres != 0)
2543 {
2544 if (Res == 0)
2545 Res = HRESULT_FROM_WIN32(wres);
2546 return;
2547 }
2548 if (Res != 0)
2549 return;
2550
2551 wres = Common->StartEvent.Lock();
2552
2553 if (wres != 0)
2554 {
2555 Res = HRESULT_FROM_WIN32(wres);
2556 return;
2557 }
2558 if (Common->ExitMode)
2559 return;
2560 }
2561
2562 Res = crcib.CrcProcess(NumIterations,
2563 CheckSumDefined ? &CheckSum : NULL, Hasher,
2564 Callback);
2565 CheckSum_Res = crcib.CheckSum_Res;
2566 /*
2567 We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2568 to time of benchmark. So we don't free Buffer here
2569 */
2570 // crcib.Buffer.Free();
2571 }
2572
2573
2574 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2575 {
2576 CCrcInfo *p = (CCrcInfo *)param;
2577
2578 #ifdef USE_ALLOCA
2579 alloca(p->AllocaSize);
2580 #endif
2581 p->Process();
2582 return THREAD_FUNC_RET_ZERO;
2583 }
2584
2585
2586 struct CCrcThreads
2587 {
2588 CCrcInfo *Items;
2589 unsigned NumThreads;
2590 CBenchSyncCommon Common;
2591 bool NeedClose;
2592
2593 CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2594
2595 WRes StartAndWait(bool exitMode = false);
2596
2597 ~CCrcThreads()
2598 {
2599 StartAndWait(true);
2600 delete []Items;
2601 }
2602 };
2603
2604
2605 WRes CCrcThreads::StartAndWait(bool exitMode)
2606 {
2607 if (!NeedClose)
2608 return 0;
2609
2610 Common.ExitMode = exitMode;
2611 WRes wres = Common.StartEvent.Set();
2612
2613 for (unsigned i = 0; i < NumThreads; i++)
2614 {
2615 WRes wres2 = Items[i].Wait_If_Created();
2616 if (wres == 0 && wres2 != 0)
2617 wres = wres2;
2618 }
2619 NumThreads = 0;
2620 NeedClose = false;
2621 return wres;
2622 }
2623
2624 #endif
2625
2626
2627 /*
2628 static UInt32 CrcCalc1(const Byte *buf, size_t size)
2629 {
2630 UInt32 crc = CRC_INIT_VAL;
2631 for (size_t i = 0; i < size; i++)
2632 crc = CRC_UPDATE_BYTE(crc, buf[i]);
2633 return CRC_GET_DIGEST(crc);
2634 }
2635 */
2636
2637 /*
2638 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2639 {
2640 RandGen(buf, size, RG);
2641 return CrcCalc1(buf, size);
2642 }
2643 */
2644
2645 static bool CrcInternalTest()
2646 {
2647 CAlignedBuffer buffer;
2648 const size_t kBufSize = 1 << 11;
2649 const size_t kCheckSize = 1 << 6;
2650 buffer.Alloc(kBufSize);
2651 if (!buffer.IsAllocated())
2652 return false;
2653 Byte *buf = (Byte *)buffer;
2654 RandGen_BufAfterPad(buf, kBufSize);
2655 UInt32 sum = 0;
2656 for (size_t i = 0; i < kBufSize - kCheckSize * 2; i += kCheckSize - 1)
2657 for (size_t j = 0; j < kCheckSize; j++)
2658 {
2659 sum = rotlFixed(sum, 11);
2660 sum += CrcCalc(buf + i + j, j);
2661 }
2662 return sum == 0x28462c7c;
2663 }
2664
2665 struct CBenchMethod
2666 {
2667 unsigned Weight;
2668 unsigned DictBits;
2669 Int32 EncComplex;
2670 Int32 DecComplexCompr;
2671 Int32 DecComplexUnc;
2672 const char *Name;
2673 // unsigned KeySize;
2674 };
2675
2676 // #define USE_SW_CMPLX
2677
2678 #ifdef USE_SW_CMPLX
2679 #define CMPLX(x) ((x) * 1000)
2680 #else
2681 #define CMPLX(x) (x)
2682 #endif
2683
2684 static const CBenchMethod g_Bench[] =
2685 {
2686 // { 40, 17, 357, 145, 20, "LZMA:x1" },
2687 // { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" },
2688
2689 { 20, 18, 360, 145, 20, "LZMA:x1" },
2690 { 20, 22, 600, 145, 20, "LZMA:x3" },
2691
2692 { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" },
2693 { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" },
2694
2695 { 10, 16, 124, 40, 14, "Deflate:x1" },
2696 { 20, 16, 376, 40, 14, "Deflate:x5" },
2697 { 10, 16, 1082, 40, 14, "Deflate:x7" },
2698 { 10, 17, 422, 40, 14, "Deflate64:x5" },
2699
2700 { 10, 15, 590, 69, 69, "BZip2:x1" },
2701 { 20, 19, 815, 122, 122, "BZip2:x5" },
2702 { 10, 19, 815, 122, 122, "BZip2:x5:mt2" },
2703 { 10, 19, 2530, 122, 122, "BZip2:x7" },
2704
2705 // { 10, 18, 1010, 0, 1150, "PPMDZip:x1" },
2706 { 10, 18, 1010, 0, 1150, "PPMD:x1" },
2707 // { 10, 22, 1655, 0, 1830, "PPMDZip:x5" },
2708 { 10, 22, 1655, 0, 1830, "PPMD:x5" },
2709
2710 // { 2, 0, -16, 0, -16, "Swap2" },
2711 { 2, 0, -16, 0, -16, "Swap4" },
2712
2713 // { 2, 0, 3, 0, 4, "Delta:1" },
2714 // { 2, 0, 3, 0, 4, "Delta:2" },
2715 // { 2, 0, 3, 0, 4, "Delta:3" },
2716 { 2, 0, 3, 0, 4, "Delta:4" },
2717 // { 2, 0, 3, 0, 4, "Delta:8" },
2718 // { 2, 0, 3, 0, 4, "Delta:32" },
2719
2720 { 2, 0, 2, 0, 2, "BCJ" },
2721 { 2, 0, 1, 0, 1, "ARM64" },
2722 { 2, 0, 1, 0, 1, "RISCV" },
2723
2724 // { 10, 0, 18, 0, 18, "AES128CBC:1" },
2725 // { 10, 0, 21, 0, 21, "AES192CBC:1" },
2726 { 10, 0, 24, 0, 24, "AES256CBC:1" },
2727
2728 // { 10, 0, 18, 0, 18, "AES128CTR:1" },
2729 // { 10, 0, 21, 0, 21, "AES192CTR:1" },
2730 // { 10, 0, 24, 0, 24, "AES256CTR:1" },
2731 // { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2732 // { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2733 { 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2734
2735 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2736 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2737 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2738
2739 // { 1, 0, CMPLX(6), 0, -2, "AES128CBC:3" },
2740 // { 1, 0, CMPLX(7), 0, -2, "AES192CBC:3" },
2741 { 1, 0, CMPLX(8), 0, -2, "AES256CBC:3" }
2742
2743 // { 1, 0, CMPLX(1), 0, -2, "AES128CTR:3" },
2744 // { 1, 0, CMPLX(1), 0, -2, "AES192CTR:3" },
2745 // { 1, 0, CMPLX(1), 0, -2, "AES256CTR:3" },
2746 };
2747
2748 struct CBenchHash
2749 {
2750 unsigned Weight;
2751 UInt32 Complex;
2752 UInt32 CheckSum;
2753 const char *Name;
2754 };
2755
2756 // #define ARM_CRC_MUL 100
2757 #define ARM_CRC_MUL 1
2758
2759 #define k_Hash_Complex_Mult 256
2760
2761 static const CBenchHash g_Hash[] =
2762 {
2763 { 20, 256, 0x21e207bb, "CRC32:12" } ,
2764 { 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2765 { 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2766 { 10, 256, 0x41b901d1, "CRC64" },
2767 { 5, 64, 0x43eac94f, "XXH64" },
2768 { 2, 2340, 0x3398a904, "MD5" },
2769 { 10, 2340, 0xff769021, "SHA1:1" },
2770 { 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2771 { 10, 5100, 0x7913ba03, "SHA256:1" },
2772 { 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2773 { 5, 3200, 0xe7aeb394, "SHA512:1" },
2774 { 2, CMPLX((40 * 4 + 1) * 4 + 4), 0xe7aeb394, "SHA512:2" },
2775 // { 10, 3428, 0x1cc99b18, "SHAKE128" },
2776 // { 10, 4235, 0x74eaddc3, "SHAKE256" },
2777 // { 10, 4000, 0xdf3e6863, "SHA3-224" },
2778 { 5, 4200, 0xcecac10d, "SHA3-256" },
2779 // { 10, 5538, 0x4e5d9163, "SHA3-384" },
2780 // { 10, 8000, 0x96a58289, "SHA3-512" },
2781 { 2, 4096, 0x85189d02, "BLAKE2sp:1" },
2782 { 2, 1024, 0x85189d02, "BLAKE2sp:2" }, // sse2-way4-fast
2783 { 2, 512, 0x85189d02, "BLAKE2sp:3" } // avx2-way8-fast
2784 #if 0
2785 , { 2, 2048, 0x85189d02, "BLAKE2sp:4" } // sse2-way1
2786 , { 2, 1024, 0x85189d02, "BLAKE2sp:5" } // sse2-way2
2787 , { 2, 1024, 0x85189d02, "BLAKE2sp:6" } // avx2-way2
2788 , { 2, 1024, 0x85189d02, "BLAKE2sp:7" } // avx2-way4
2789 #endif
2790 };
2791
2792 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2793 {
2794 char s[128];
2795 unsigned startPos = (unsigned)sizeof(s) - 32;
2796 memset(s, ' ', startPos);
2797 ConvertUInt64ToString(value, s + startPos);
2798 // if (withSpace)
2799 {
2800 startPos--;
2801 size++;
2802 }
2803 unsigned len = (unsigned)strlen(s + startPos);
2804 if (size > len)
2805 {
2806 size -= len;
2807 if (startPos < size)
2808 startPos = 0;
2809 else
2810 startPos -= size;
2811 }
2812 f.Print(s + startPos);
2813 }
2814
2815 static const unsigned kFieldSize_Name = 12;
2816 static const unsigned kFieldSize_SmallName = 4;
2817 static const unsigned kFieldSize_Speed = 9;
2818 static const unsigned kFieldSize_Usage = 5;
2819 static const unsigned kFieldSize_RU = 6;
2820 static const unsigned kFieldSize_Rating = 6;
2821 static const unsigned kFieldSize_EU = 5;
2822 static const unsigned kFieldSize_Effec = 5;
2823 static const unsigned kFieldSize_CrcSpeed = 8;
2824
2825
2826 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2827 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2828
2829
2830 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2831 {
2832 PrintNumber(f, (rating + 500000) / 1000000, size);
2833 }
2834
2835
2836 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2837 {
2838 UInt64 v = 0;
2839 if (divider != 0)
2840 v = (val * 100 + divider / 2) / divider;
2841 PrintNumber(f, v, size);
2842 }
2843
2844 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2845 {
2846 char s[256];
2847 memset(s, (Byte)c, size);
2848 s[size] = 0;
2849 f.Print(s);
2850 }
2851
2852 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2853 {
2854 PrintChars(f, ' ', size);
2855 }
2856
2857 static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2858 {
2859 PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2860 }
2861
2862 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2863 {
2864 PrintUsage(f, usage, kFieldSize_Usage);
2865 PrintRating(f, rpu, kFieldSize_RU);
2866 PrintRating(f, rating, kFieldSize_Rating);
2867 if (showFreq)
2868 {
2869 if (cpuFreq == 0)
2870 PrintSpaces(f, kFieldSize_EUAndEffec);
2871 else
2872 {
2873 PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2874 PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2875 }
2876 }
2877 }
2878
2879
2880 void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2881 {
2882 Speed = info.GetUnpackSizeSpeed();
2883 Usage = info.GetUsage();
2884 RPU = info.GetRatingPerUsage(Rating);
2885 }
2886
2887 void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2888 {
2889 NumIterations2 *= weight;
2890 RPU *= weight;
2891 Rating *= weight;
2892 Usage *= weight;
2893 Speed *= weight;
2894 }
2895
2896 void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2897 {
2898 Rating += r.Rating;
2899 Usage += r.Usage;
2900 RPU += r.RPU;
2901 Speed += r.Speed;
2902 // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2903 NumIterations2 += r.NumIterations2;
2904 }
2905
2906 static void PrintResults(IBenchPrintCallback *f,
2907 const CBenchInfo &info,
2908 unsigned weight,
2909 UInt64 rating,
2910 bool showFreq, UInt64 cpuFreq,
2911 CTotalBenchRes *res)
2912 {
2913 CTotalBenchRes t;
2914 t.Rating = rating;
2915 t.NumIterations2 = 1;
2916 t.Generate_From_BenchInfo(info);
2917
2918 if (f)
2919 {
2920 if (t.Speed != 0)
2921 PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2922 else
2923 PrintSpaces(*f, 1 + kFieldSize_Speed);
2924 }
2925 if (f)
2926 {
2927 PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2928 }
2929
2930 if (res)
2931 {
2932 // res->NumIterations1++;
2933 t.Mult_For_Weight(weight);
2934 res->Update_With_Res(t);
2935 }
2936 }
2937
2938 static void PrintTotals(IBenchPrintCallback &f,
2939 bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2940 {
2941 const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2942 const UInt64 speed = res.Speed / numIterations2;
2943 if (showSpeed && speed != 0)
2944 PrintNumber(f, speed / 1024, kFieldSize_Speed);
2945 else
2946 PrintSpaces(f, 1 + kFieldSize_Speed);
2947
2948 // PrintSpaces(f, 1 + kFieldSize_Speed);
2949 // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2950 PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2951 }
2952
2953
2954 static void PrintHex(AString &s, UInt64 v)
2955 {
2956 char temp[32];
2957 ConvertUInt64ToHex(v, temp);
2958 s += temp;
2959 }
2960
2961 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2962 {
2963 AString s;
2964 // s.Add_UInt32(ti.numProcessThreads);
2965 unsigned numSysThreads = ti.GetNumSystemThreads();
2966 if (ti.GetNumProcessThreads() != numSysThreads)
2967 {
2968 // if (ti.numProcessThreads != ti.numSysThreads)
2969 {
2970 s += " / ";
2971 s.Add_UInt32(numSysThreads);
2972 }
2973 s += " : ";
2974 #ifdef _WIN32
2975 PrintHex(s, ti.processAffinityMask);
2976 s += " / ";
2977 PrintHex(s, ti.systemAffinityMask);
2978 #else
2979 unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2980 if (i == 0)
2981 i = 4;
2982 for (; i >= 4; )
2983 {
2984 i -= 4;
2985 unsigned val = 0;
2986 for (unsigned k = 0; k < 4; k++)
2987 {
2988 const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2989 val += (bit << k);
2990 }
2991 PrintHex(s, val);
2992 }
2993 #endif
2994 }
2995 return s;
2996 }
2997
2998
2999 #ifdef Z7_LARGE_PAGES
3000
3001 #ifdef _WIN32
3002 extern bool g_LargePagesMode;
3003 extern "C"
3004 {
3005 extern SIZE_T g_LargePageSize;
3006 }
3007 #endif
3008
3009 void Add_LargePages_String(AString &s)
3010 {
3011 #ifdef _WIN32
3012 if (g_LargePagesMode || g_LargePageSize != 0)
3013 {
3014 s.Add_OptSpaced("(LP-");
3015 PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
3016 #ifdef MY_CPU_X86_OR_AMD64
3017 if (CPU_IsSupported_PageGB())
3018 s += "-1G";
3019 #endif
3020 if (!g_LargePagesMode)
3021 s += "-NA";
3022 s += ")";
3023 }
3024 #else
3025 s += "";
3026 #endif
3027 }
3028
3029 #endif
3030
3031
3032
3033 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
3034 bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
3035 {
3036 f.Print("RAM ");
3037 f.Print(sizeString);
3038 if (size_Defined)
3039 PrintNumber(f, (size >> 20), 6);
3040 else
3041 f.Print(" ?");
3042 f.Print(" MB");
3043
3044 #ifdef Z7_LARGE_PAGES
3045 {
3046 AString s;
3047 Add_LargePages_String(s);
3048 f.Print(s);
3049 }
3050 #endif
3051
3052 f.Print(", # ");
3053 f.Print(threadsString);
3054 PrintNumber(f, numThreads, 3);
3055 }
3056
3057
3058
3059 struct CBenchCallbackToPrint Z7_final: public IBenchCallback
3060 {
3061 bool NeedPrint;
3062 bool Use2Columns;
3063 bool ShowFreq;
3064 unsigned NameFieldSize;
3065
3066 unsigned EncodeWeight;
3067 unsigned DecodeWeight;
3068
3069 UInt64 CpuFreq;
3070 UInt64 DictSize;
3071
3072 IBenchPrintCallback *_file;
3073 CBenchProps BenchProps;
3074 CTotalBenchRes EncodeRes;
3075 CTotalBenchRes DecodeRes;
3076
3077 CBenchInfo BenchInfo_Results[2];
3078
3079 CBenchCallbackToPrint():
3080 NeedPrint(true),
3081 Use2Columns(false),
3082 ShowFreq(false),
3083 NameFieldSize(0),
3084 EncodeWeight(1),
3085 DecodeWeight(1),
3086 CpuFreq(0)
3087 {}
3088
3089 void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3090 void Print(const char *s);
3091 void NewLine();
3092
3093 HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3094 HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3095 HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3096 };
3097
3098 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3099 {
3100 ShowFreq = showFreq;
3101 CpuFreq = cpuFreq;
3102 return S_OK;
3103 }
3104
3105 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3106 {
3107 RINOK(_file->CheckBreak())
3108 if (final)
3109 BenchInfo_Results[0] = info;
3110 if (final)
3111 if (NeedPrint)
3112 {
3113 const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3114 PrintResults(_file, info,
3115 EncodeWeight, rating,
3116 ShowFreq, CpuFreq, &EncodeRes);
3117 if (!Use2Columns)
3118 _file->NewLine();
3119 }
3120 return S_OK;
3121 }
3122
3123 static const char * const kSep = " | ";
3124
3125 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3126 {
3127 RINOK(_file->CheckBreak())
3128 if (final)
3129 BenchInfo_Results[1] = info;
3130 if (final)
3131 if (NeedPrint)
3132 {
3133 const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3134 if (Use2Columns)
3135 _file->Print(kSep);
3136 else
3137 PrintSpaces(*_file, NameFieldSize);
3138 CBenchInfo info2 = info;
3139 info2.UnpackSize *= info2.NumIterations;
3140 info2.PackSize *= info2.NumIterations;
3141 info2.NumIterations = 1;
3142 PrintResults(_file, info2,
3143 DecodeWeight, rating,
3144 ShowFreq, CpuFreq, &DecodeRes);
3145 }
3146 return S_OK;
3147 }
3148
3149 void CBenchCallbackToPrint::Print(const char *s)
3150 {
3151 _file->Print(s);
3152 }
3153
3154 void CBenchCallbackToPrint::NewLine()
3155 {
3156 _file->NewLine();
3157 }
3158
3159 static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3160 {
3161 f.Print(s);
3162 int numSpaces = (int)size - (int)MyStringLen(s);
3163 if (numSpaces > 0)
3164 PrintSpaces(f, (unsigned)numSpaces);
3165 }
3166
3167 static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3168 {
3169 int numSpaces = (int)size - (int)MyStringLen(s);
3170 if (numSpaces > 0)
3171 PrintSpaces(f, (unsigned)numSpaces);
3172 f.Print(s);
3173 }
3174
3175
3176 static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3177 {
3178 UString wildc = GetUnicodeString(mask);
3179 UString bname = GetUnicodeString(name);
3180 wildc.MakeLower_Ascii();
3181 bname.MakeLower_Ascii();
3182 return DoesWildcardMatchName(wildc, bname);
3183 }
3184
3185
3186 static HRESULT TotalBench(
3187 DECL_EXTERNAL_CODECS_LOC_VARS
3188 const COneMethodInfo &methodMask,
3189 UInt64 complexInCommands,
3190 #ifndef Z7_ST
3191 UInt32 numThreads,
3192 const CAffinityMode *affinityMode,
3193 #endif
3194 bool forceUnpackSize,
3195 size_t unpackSize,
3196 const Byte *fileData,
3197 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3198 {
3199 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3200 {
3201 const CBenchMethod &bench = g_Bench[i];
3202 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3203 continue;
3204 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3205 {
3206 unsigned keySize = 32;
3207 if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3208 else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3209 callback->BenchProps.KeySize = keySize;
3210 }
3211 callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3212 callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3213 callback->BenchProps.EncComplex = bench.EncComplex;
3214
3215 COneMethodInfo method;
3216 NCOM::CPropVariant propVariant;
3217 propVariant = bench.Name;
3218 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3219
3220 size_t unpackSize2 = unpackSize;
3221 if (!forceUnpackSize && bench.DictBits == 0)
3222 unpackSize2 = kFilterUnpackSize;
3223
3224 callback->EncodeWeight = bench.Weight;
3225 callback->DecodeWeight = bench.Weight;
3226
3227 const HRESULT res = MethodBench(
3228 EXTERNAL_CODECS_LOC_VARS
3229 complexInCommands,
3230 #ifndef Z7_ST
3231 false, numThreads, affinityMode,
3232 #endif
3233 method,
3234 unpackSize2, fileData,
3235 bench.DictBits,
3236 printCallback, callback, &callback->BenchProps);
3237
3238 if (res == E_NOTIMPL)
3239 {
3240 // callback->Print(" ---");
3241 // we need additional empty line as line for decompression results
3242 if (!callback->Use2Columns)
3243 callback->NewLine();
3244 }
3245 else
3246 {
3247 RINOK(res)
3248 }
3249
3250 callback->NewLine();
3251 }
3252 return S_OK;
3253 }
3254
3255
3256 struct CFreqBench
3257 {
3258 // in:
3259 UInt64 complexInCommands;
3260 UInt32 numThreads;
3261 bool showFreq;
3262 UInt64 specifiedFreq;
3263
3264 // out:
3265 UInt64 CpuFreqRes;
3266 UInt64 UsageRes;
3267 UInt32 res;
3268
3269 CFreqBench()
3270 {}
3271
3272 HRESULT FreqBench(IBenchPrintCallback *_file
3273 #ifndef Z7_ST
3274 , const CAffinityMode *affinityMode
3275 #endif
3276 );
3277 };
3278
3279
3280 HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3281 #ifndef Z7_ST
3282 , const CAffinityMode *affinityMode
3283 #endif
3284 )
3285 {
3286 res = 0;
3287 CpuFreqRes = 0;
3288 UsageRes = 0;
3289
3290 if (numThreads == 0)
3291 numThreads = 1;
3292
3293 #ifdef Z7_ST
3294 numThreads = 1;
3295 #endif
3296
3297 const UInt32 complexity = kNumFreqCommands;
3298 UInt64 numIterations = complexInCommands / complexity;
3299 UInt32 numIterations2 = 1 << 30;
3300 if (numIterations > numIterations2)
3301 numIterations /= numIterations2;
3302 else
3303 {
3304 numIterations2 = (UInt32)numIterations;
3305 numIterations = 1;
3306 }
3307
3308 CBenchInfoCalc progressInfoSpec;
3309
3310 #ifndef Z7_ST
3311
3312 bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3313
3314 if (mtMode)
3315 {
3316 CFreqThreads threads;
3317 threads.Items = new CFreqInfo[numThreads];
3318 UInt32 i;
3319 for (i = 0; i < numThreads; i++)
3320 {
3321 CFreqInfo &info = threads.Items[i];
3322 info.Callback = _file;
3323 info.CallbackRes = S_OK;
3324 info.NumIterations = numIterations;
3325 info.Size = numIterations2;
3326 }
3327 progressInfoSpec.SetStartTime();
3328 for (i = 0; i < numThreads; i++)
3329 {
3330 // Sleep(10);
3331 CFreqInfo &info = threads.Items[i];
3332 WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3333 if (info.Thread.IsCreated())
3334 threads.NumThreads++;
3335 if (wres != 0)
3336 return HRESULT_FROM_WIN32(wres);
3337 }
3338 WRes wres = threads.WaitAll();
3339 if (wres != 0)
3340 return HRESULT_FROM_WIN32(wres);
3341 for (i = 0; i < numThreads; i++)
3342 {
3343 RINOK(threads.Items[i].CallbackRes)
3344 }
3345 }
3346 else
3347 #endif
3348 {
3349 progressInfoSpec.SetStartTime();
3350 UInt32 sum = g_BenchCpuFreqTemp;
3351 UInt64 k = numIterations;
3352 do
3353 {
3354 sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3355 if (_file)
3356 {
3357 RINOK(_file->CheckBreak())
3358 }
3359 }
3360 while (--k);
3361 res += sum;
3362 }
3363
3364 if (res == 0x12345678)
3365 if (_file)
3366 {
3367 RINOK(_file->CheckBreak())
3368 }
3369
3370 CBenchInfo info;
3371 progressInfoSpec.SetFinishTime(info);
3372
3373 info.UnpackSize = 0;
3374 info.PackSize = 0;
3375 info.NumIterations = 1;
3376
3377 const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3378 const UInt64 rating = info.GetSpeed(numCommands);
3379 CpuFreqRes = rating / numThreads;
3380 UsageRes = info.GetUsage();
3381
3382 if (_file)
3383 {
3384 PrintResults(_file, info,
3385 0, // weight
3386 rating,
3387 showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3388 RINOK(_file->CheckBreak())
3389 }
3390
3391 return S_OK;
3392 }
3393
3394
3395
3396 static HRESULT CrcBench(
3397 DECL_EXTERNAL_CODECS_LOC_VARS
3398 UInt64 complexInCommands,
3399 UInt32 numThreads,
3400 const size_t bufferSize,
3401 const Byte *fileData,
3402
3403 UInt64 &speed,
3404 UInt64 &usage,
3405
3406 UInt32 complexity, unsigned benchWeight,
3407 const UInt32 *checkSum,
3408 const COneMethodInfo &method,
3409 IBenchPrintCallback *_file,
3410 #ifndef Z7_ST
3411 const CAffinityMode *affinityMode,
3412 #endif
3413 bool showRating,
3414 CTotalBenchRes *encodeRes,
3415 bool showFreq, UInt64 cpuFreq)
3416 {
3417 if (numThreads == 0)
3418 numThreads = 1;
3419
3420 #ifdef Z7_ST
3421 numThreads = 1;
3422 #endif
3423
3424 const AString &methodName = method.MethodName;
3425 // methodName.RemoveChar(L'-');
3426 CMethodId hashID;
3427 if (!FindHashMethod(
3428 EXTERNAL_CODECS_LOC_VARS
3429 methodName, hashID))
3430 return E_NOTIMPL;
3431
3432 /*
3433 // if will generate random data in each thread, instead of global data
3434 CMidAlignedBuffer buffer;
3435 if (!fileData)
3436 {
3437 ALLOC_WITH_HRESULT(&buffer, bufferSize)
3438 RandGen(buffer, bufferSize);
3439 fileData = buffer;
3440 }
3441 */
3442
3443 const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3444 UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3445 if (numIterations == 0)
3446 numIterations = 1;
3447
3448 CBenchInfoCalc progressInfoSpec;
3449 CBenchInfo info;
3450
3451 #ifndef Z7_ST
3452 bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3453
3454 if (mtEncMode)
3455 {
3456 CCrcThreads threads;
3457 threads.Items = new CCrcInfo[numThreads];
3458 {
3459 WRes wres = threads.Common.StartEvent.Create();
3460 if (wres != 0)
3461 return HRESULT_FROM_WIN32(wres);
3462 threads.NeedClose = true;
3463 }
3464
3465 UInt32 i;
3466 for (i = 0; i < numThreads; i++)
3467 {
3468 CCrcInfo &ci = threads.Items[i];
3469 AString name;
3470 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3471 if (!ci.Hasher)
3472 return E_NOTIMPL;
3473 CMyComPtr<ICompressSetCoderProperties> scp;
3474 ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3475 if (scp)
3476 {
3477 RINOK(method.SetCoderProps(scp))
3478 }
3479
3480 ci.Callback = _file;
3481 ci.Data = fileData;
3482 ci.NumIterations = numIterations;
3483 ci.Size = bufferSize;
3484 ci.CheckSumDefined = false;
3485 if (checkSum)
3486 {
3487 ci.CheckSum = *checkSum;
3488 ci.CheckSumDefined = true;
3489 }
3490
3491 #ifdef USE_ALLOCA
3492 ci.AllocaSize = BENCH_ALLOCA_VALUE(i);
3493 #endif
3494 }
3495
3496 for (i = 0; i < numThreads; i++)
3497 {
3498 CCrcInfo &ci = threads.Items[i];
3499 ci.ThreadIndex = i;
3500 ci.Common = &threads.Common;
3501 ci.AffinityMode = *affinityMode;
3502 HRESULT hres = ci.CreateThread();
3503 if (ci.Thread.IsCreated())
3504 threads.NumThreads++;
3505 if (hres != 0)
3506 return hres;
3507 }
3508
3509 for (i = 0; i < numThreads; i++)
3510 {
3511 CCrcInfo &ci = threads.Items[i];
3512 WRes wres = ci.ReadyEvent.Lock();
3513 if (wres != 0)
3514 return HRESULT_FROM_WIN32(wres);
3515 RINOK(ci.Res)
3516 }
3517
3518 progressInfoSpec.SetStartTime();
3519
3520 WRes wres = threads.StartAndWait();
3521 if (wres != 0)
3522 return HRESULT_FROM_WIN32(wres);
3523
3524 progressInfoSpec.SetFinishTime(info);
3525
3526 for (i = 0; i < numThreads; i++)
3527 {
3528 RINOK(threads.Items[i].Res)
3529 if (i != 0)
3530 if (threads.Items[i].CheckSum_Res !=
3531 threads.Items[i - 1].CheckSum_Res)
3532 return S_FALSE;
3533 }
3534 }
3535 else
3536 #endif
3537 {
3538 CMyComPtr<IHasher> hasher;
3539 AString name;
3540 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3541 if (!hasher)
3542 return E_NOTIMPL;
3543 CMyComPtr<ICompressSetCoderProperties> scp;
3544 hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3545 if (scp)
3546 {
3547 RINOK(method.SetCoderProps(scp))
3548 }
3549 CCrcInfo_Base crcib;
3550 crcib.CreateLocalBuf = false;
3551 RINOK(crcib.Generate(fileData, bufferSize))
3552 progressInfoSpec.SetStartTime();
3553 RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3554 progressInfoSpec.SetFinishTime(info);
3555 }
3556
3557
3558 UInt64 unpSize = numIterations * bufferSize;
3559 UInt64 unpSizeThreads = unpSize * numThreads;
3560 info.UnpackSize = unpSizeThreads;
3561 info.PackSize = unpSizeThreads;
3562 info.NumIterations = 1;
3563
3564 if (_file)
3565 {
3566 if (showRating)
3567 {
3568 UInt64 unpSizeThreads2 = unpSizeThreads;
3569 if (unpSizeThreads2 == 0)
3570 unpSizeThreads2 = numIterations * 1 * numThreads;
3571 const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3572 const UInt64 rating = info.GetSpeed(numCommands);
3573 PrintResults(_file, info,
3574 benchWeight, rating,
3575 showFreq, cpuFreq, encodeRes);
3576 }
3577 RINOK(_file->CheckBreak())
3578 }
3579
3580 speed = info.GetSpeed(unpSizeThreads);
3581 usage = info.GetUsage();
3582
3583 return S_OK;
3584 }
3585
3586
3587
3588 static HRESULT TotalBench_Hash(
3589 DECL_EXTERNAL_CODECS_LOC_VARS
3590 const COneMethodInfo &methodMask,
3591 UInt64 complexInCommands,
3592 UInt32 numThreads,
3593 size_t bufSize,
3594 const Byte *fileData,
3595 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3596 #ifndef Z7_ST
3597 const CAffinityMode *affinityMode,
3598 #endif
3599 CTotalBenchRes *encodeRes,
3600 bool showFreq, UInt64 cpuFreq)
3601 {
3602 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3603 {
3604 const CBenchHash &bench = g_Hash[i];
3605 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3606 continue;
3607 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3608 // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3609 // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3610 // callback->BenchProps.EncComplex = bench.EncComplex;
3611
3612 COneMethodInfo method;
3613 NCOM::CPropVariant propVariant;
3614 propVariant = bench.Name;
3615 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3616
3617 UInt64 speed, usage;
3618
3619 const HRESULT res = CrcBench(
3620 EXTERNAL_CODECS_LOC_VARS
3621 complexInCommands,
3622 numThreads, bufSize, fileData,
3623 speed, usage,
3624 bench.Complex, bench.Weight,
3625 (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3626 method,
3627 printCallback,
3628 #ifndef Z7_ST
3629 affinityMode,
3630 #endif
3631 true, // showRating
3632 encodeRes, showFreq, cpuFreq);
3633 if (res == E_NOTIMPL)
3634 {
3635 // callback->Print(" ---");
3636 }
3637 else
3638 {
3639 RINOK(res)
3640 }
3641 callback->NewLine();
3642 }
3643 return S_OK;
3644 }
3645
3646 struct CTempValues
3647 {
3648 UInt64 *Values;
3649 CTempValues(): Values(NULL) {}
3650 void Alloc(UInt32 num) { Values = new UInt64[num]; }
3651 ~CTempValues() { delete []Values; }
3652 };
3653
3654 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3655 {
3656 const wchar_t *end;
3657 UInt64 result = ConvertStringToUInt64(s, &end);
3658 if (*end != 0 || s.IsEmpty())
3659 prop = s;
3660 else if (result <= (UInt32)0xFFFFFFFF)
3661 prop = (UInt32)result;
3662 else
3663 prop = result;
3664 }
3665
3666
3667 static bool AreSameMethodNames(const char *fullName, const char *shortName)
3668 {
3669 return StringsAreEqualNoCase_Ascii(fullName, shortName);
3670 }
3671
3672
3673
3674
3675 static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3676 {
3677 PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads);
3678 }
3679
3680
3681 static void Print_Delimiter(IBenchPrintCallback &f)
3682 {
3683 f.Print(" |");
3684 }
3685
3686 static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3687 {
3688 char s[16];
3689 ConvertUInt32ToString(pow, s);
3690 unsigned pos = MyStringLen(s);
3691 s[pos++] = ':';
3692 s[pos] = 0;
3693 PrintLeft(f, s, kFieldSize_SmallName); // 4
3694 }
3695
3696 static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3697 UInt64 usage, UInt64 speed)
3698 {
3699 PrintUsage(f, usage, kFieldSize_Usage);
3700 PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3701 }
3702
3703
3704 HRESULT Bench(
3705 DECL_EXTERNAL_CODECS_LOC_VARS
3706 IBenchPrintCallback *printCallback,
3707 IBenchCallback *benchCallback,
3708 const CObjectVector<CProperty> &props,
3709 UInt32 numIterations,
3710 bool multiDict,
3711 IBenchFreqCallback *freqCallback)
3712 {
3713 // for (int y = 0; y < 10000; y++)
3714 if (!CrcInternalTest())
3715 return E_FAIL;
3716
3717 UInt32 numCPUs = 1;
3718 size_t ramSize = (size_t)sizeof(size_t) << 29;
3719
3720 NSystem::CProcessAffinity threadsInfo;
3721 threadsInfo.InitST();
3722
3723 #ifndef Z7_ST
3724
3725 if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3726 numCPUs = threadsInfo.GetNumProcessThreads();
3727 else
3728 numCPUs = NSystem::GetNumberOfProcessors();
3729
3730 #endif
3731
3732 // numCPUs = 24;
3733 /*
3734 {
3735 DWORD_PTR mask = (1 << 0);
3736 DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3737 old = old;
3738 DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3739 old2 = old2;
3740 return 0;
3741 }
3742 */
3743
3744 const bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3745
3746 UInt32 numThreadsSpecified = numCPUs;
3747 bool needSetComplexity = false;
3748 UInt32 testTimeMs = kComplexInMs;
3749 UInt32 startDicLog = 22;
3750 bool startDicLog_Defined = false;
3751 UInt64 specifiedFreq = 0;
3752 bool multiThreadTests = false;
3753 UInt64 complexInCommands = kComplexInCommands;
3754 UInt32 numThreads_Start = 1;
3755
3756 #ifndef Z7_ST
3757 CAffinityMode affinityMode;
3758 #endif
3759
3760
3761 COneMethodInfo method;
3762
3763 CMidAlignedBuffer fileDataBuffer;
3764 bool use_fileData = false;
3765 bool isFixedDict = false;
3766
3767 {
3768 unsigned i;
3769
3770 if (printCallback)
3771 {
3772 for (i = 0; i < props.Size(); i++)
3773 {
3774 const CProperty &property = props[i];
3775 printCallback->Print(" ");
3776 printCallback->Print(GetAnsiString(property.Name));
3777 if (!property.Value.IsEmpty())
3778 {
3779 printCallback->Print("=");
3780 printCallback->Print(GetAnsiString(property.Value));
3781 }
3782 }
3783 if (!props.IsEmpty())
3784 printCallback->NewLine();
3785 }
3786
3787
3788 for (i = 0; i < props.Size(); i++)
3789 {
3790 const CProperty &property = props[i];
3791 UString name (property.Name);
3792 name.MakeLower_Ascii();
3793
3794 if (name.IsEqualTo("file"))
3795 {
3796 if (property.Value.IsEmpty())
3797 return E_INVALIDARG;
3798
3799 NFile::NIO::CInFile file;
3800 if (!file.Open(us2fs(property.Value)))
3801 return GetLastError_noZero_HRESULT();
3802 size_t len;
3803 {
3804 UInt64 len64;
3805 if (!file.GetLength(len64))
3806 return GetLastError_noZero_HRESULT();
3807 if (printCallback)
3808 {
3809 printCallback->Print("file size =");
3810 PrintNumber(*printCallback, len64, 0);
3811 printCallback->NewLine();
3812 }
3813 len = (size_t)len64;
3814 if (len != len64)
3815 return E_INVALIDARG;
3816 }
3817
3818 // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3819
3820 ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3821 use_fileData = true;
3822
3823 {
3824 size_t processed;
3825 if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3826 return GetLastError_noZero_HRESULT();
3827 if (processed != len)
3828 return E_FAIL;
3829 }
3830 continue;
3831 }
3832
3833 NCOM::CPropVariant propVariant;
3834 if (!property.Value.IsEmpty())
3835 ParseNumberString(property.Value, propVariant);
3836
3837 if (name.IsEqualTo("time"))
3838 {
3839 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3840 needSetComplexity = true;
3841 testTimeMs *= 1000;
3842 continue;
3843 }
3844
3845 if (name.IsEqualTo("timems"))
3846 {
3847 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3848 needSetComplexity = true;
3849 continue;
3850 }
3851
3852 if (name.IsEqualTo("tic"))
3853 {
3854 UInt32 v;
3855 RINOK(ParsePropToUInt32(UString(), propVariant, v))
3856 if (v >= 64)
3857 return E_INVALIDARG;
3858 complexInCommands = (UInt64)1 << v;
3859 continue;
3860 }
3861
3862 const bool isCurrent_fixedDict = name.IsEqualTo("df");
3863 if (isCurrent_fixedDict)
3864 isFixedDict = true;
3865 if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3866 {
3867 RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3868 if (startDicLog > 32)
3869 return E_INVALIDARG;
3870 startDicLog_Defined = true;
3871 continue;
3872 }
3873
3874 if (name.IsEqualTo("mts"))
3875 {
3876 RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3877 continue;
3878 }
3879
3880 if (name.IsEqualTo("af"))
3881 {
3882 UInt32 bundle;
3883 RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3884 if (bundle > 0 && bundle < numCPUs)
3885 {
3886 #ifndef Z7_ST
3887 affinityMode.SetLevels(numCPUs, 2);
3888 affinityMode.NumBundleThreads = bundle;
3889 #endif
3890 }
3891 continue;
3892 }
3893
3894 if (name.IsEqualTo("freq"))
3895 {
3896 UInt32 freq32 = 0;
3897 RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3898 if (freq32 == 0)
3899 return E_INVALIDARG;
3900 specifiedFreq = (UInt64)freq32 * 1000000;
3901
3902 if (printCallback)
3903 {
3904 printCallback->Print("freq=");
3905 PrintNumber(*printCallback, freq32, 0);
3906 printCallback->NewLine();
3907 }
3908
3909 continue;
3910 }
3911
3912 if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3913 {
3914 const UString s = name.Ptr(2);
3915 if (s.IsEqualTo("*")
3916 || (s.IsEmpty()
3917 && propVariant.vt == VT_BSTR
3918 && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3919 {
3920 multiThreadTests = true;
3921 continue;
3922 }
3923 #ifndef Z7_ST
3924 RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3925 #endif
3926 continue;
3927 }
3928
3929 RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3930 }
3931 }
3932
3933 if (printCallback)
3934 {
3935 AString s;
3936
3937 #if 1 || !defined(Z7_MSC_VER_ORIGINAL) || (Z7_MSC_VER_ORIGINAL >= 1900)
3938 s += "Compiler: ";
3939 GetCompiler(s);
3940 printCallback->Print(s);
3941 printCallback->NewLine();
3942 s.Empty();
3943 #endif
3944
3945 GetSystemInfoText(s);
3946 printCallback->Print(s);
3947 printCallback->NewLine();
3948 }
3949
3950 if (printCallback)
3951 {
3952 printCallback->Print("1T CPU Freq (MHz):");
3953 }
3954
3955 if (printCallback || freqCallback)
3956 {
3957 UInt64 numMilCommands = 1 << 6;
3958 if (specifiedFreq != 0)
3959 {
3960 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3961 numMilCommands >>= 1;
3962 }
3963
3964 for (int jj = 0;; jj++)
3965 {
3966 if (printCallback)
3967 RINOK(printCallback->CheckBreak())
3968
3969 UInt64 start = ::GetTimeCount();
3970 UInt32 sum = (UInt32)start;
3971 sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3972 if (sum == 0xF1541213)
3973 if (printCallback)
3974 printCallback->Print("");
3975 const UInt64 realDelta = ::GetTimeCount() - start;
3976 start = realDelta;
3977 if (start == 0)
3978 start = 1;
3979 if (start > (UInt64)1 << 61)
3980 start = 1;
3981 const UInt64 freq = GetFreq();
3982 // mips is constant in some compilers
3983 const UInt64 hzVal = MyMultDiv64(numMilCommands * 1000000, freq, start);
3984 const UInt64 mipsVal = numMilCommands * freq / start;
3985 if (printCallback)
3986 {
3987 if (realDelta == 0)
3988 {
3989 printCallback->Print(" -");
3990 }
3991 else
3992 {
3993 // PrintNumber(*printCallback, start, 0);
3994 PrintNumber(*printCallback, mipsVal, 5);
3995 }
3996 }
3997 if (freqCallback)
3998 {
3999 RINOK(freqCallback->AddCpuFreq(1, hzVal, kBenchmarkUsageMult))
4000 }
4001
4002 if (jj >= 1)
4003 {
4004 bool needStop = (numMilCommands >= (1 <<
4005 #ifdef _DEBUG
4006 7
4007 #else
4008 11
4009 #endif
4010 ));
4011 if (start >= freq * 16)
4012 {
4013 printCallback->Print(" (Cmplx)");
4014 if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
4015 {
4016 needSetComplexity = true;
4017 }
4018 needStop = true;
4019 }
4020 if (needSetComplexity)
4021 SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
4022 if (needStop)
4023 break;
4024 numMilCommands <<= 1;
4025 }
4026 }
4027 if (freqCallback)
4028 {
4029 RINOK(freqCallback->FreqsFinished(1))
4030 }
4031 }
4032
4033 if (printCallback || freqCallback)
4034 for (unsigned test = 0; test < 3; test++)
4035 {
4036 if (numThreadsSpecified < 2)
4037 {
4038 // if (test == 1)
4039 break;
4040 }
4041 if (test == 2 && numThreadsSpecified <= numCPUs)
4042 break;
4043 if (printCallback)
4044 printCallback->NewLine();
4045
4046 /* it can show incorrect frequency for HT threads. */
4047
4048 UInt32 numThreads = numThreadsSpecified;
4049 if (test < 2)
4050 {
4051 if (numThreads >= numCPUs)
4052 numThreads = numCPUs;
4053 if (test == 0)
4054 numThreads /= 2;
4055 }
4056 if (numThreads < 1)
4057 numThreads = 1;
4058
4059 if (printCallback)
4060 {
4061 char s[128];
4062 ConvertUInt64ToString(numThreads, s);
4063 printCallback->Print(s);
4064 printCallback->Print("T CPU Freq (MHz):");
4065 }
4066 UInt64 numMilCommands = 1 <<
4067 #ifdef _DEBUG
4068 7;
4069 #else
4070 10;
4071 #endif
4072
4073 if (specifiedFreq != 0)
4074 {
4075 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
4076 numMilCommands >>= 1;
4077 }
4078
4079 // for (int jj = 0;; jj++)
4080 for (;;)
4081 {
4082 if (printCallback)
4083 RINOK(printCallback->CheckBreak())
4084
4085 {
4086 // PrintLeft(f, "CPU", kFieldSize_Name);
4087
4088 // UInt32 resVal;
4089
4090 CFreqBench fb;
4091 fb.complexInCommands = numMilCommands * 1000000;
4092 fb.numThreads = numThreads;
4093 // showFreq;
4094 // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4095 fb.showFreq = true;
4096 fb.specifiedFreq = 1;
4097
4098 const HRESULT res = fb.FreqBench(NULL /* printCallback */
4099 #ifndef Z7_ST
4100 , &affinityMode
4101 #endif
4102 );
4103 RINOK(res)
4104
4105 if (freqCallback)
4106 {
4107 RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4108 }
4109
4110 if (printCallback)
4111 {
4112 /*
4113 if (realDelta == 0)
4114 {
4115 printCallback->Print(" -");
4116 }
4117 else
4118 */
4119 {
4120 // PrintNumber(*printCallback, start, 0);
4121 PrintUsage(*printCallback, fb.UsageRes, 3);
4122 printCallback->Print("%");
4123 PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4124 printCallback->Print(" ");
4125
4126 // PrintNumber(*printCallback, fb.UsageRes, 5);
4127 }
4128 }
4129 }
4130 // if (jj >= 1)
4131 {
4132 const bool needStop = (numMilCommands >= (1 <<
4133 #ifdef _DEBUG
4134 7
4135 #else
4136 11
4137 #endif
4138 ));
4139 if (needStop)
4140 break;
4141 numMilCommands <<= 1;
4142 }
4143 }
4144 if (freqCallback)
4145 {
4146 RINOK(freqCallback->FreqsFinished(numThreads))
4147 }
4148 }
4149
4150
4151 if (printCallback)
4152 {
4153 printCallback->NewLine();
4154 printCallback->NewLine();
4155 PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4156 printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4157 printCallback->NewLine();
4158 }
4159
4160 if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4161 return E_INVALIDARG;
4162
4163 UInt64 dict = (UInt64)1 << startDicLog;
4164 const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4165
4166 const unsigned level = method.GetLevel();
4167
4168 AString &methodName = method.MethodName;
4169 const AString original_MethodName = methodName;
4170 if (methodName.IsEmpty())
4171 methodName = "LZMA";
4172
4173 if (benchCallback)
4174 {
4175 CBenchProps benchProps;
4176 benchProps.SetLzmaCompexity();
4177 const UInt64 dictSize = method.Get_Lzma_DicSize();
4178
4179 size_t uncompressedDataSize;
4180 if (use_fileData)
4181 {
4182 uncompressedDataSize = fileDataBuffer.Size();
4183 }
4184 else
4185 {
4186 uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4187 if (uncompressedDataSize < dictSize)
4188 return E_INVALIDARG;
4189 }
4190
4191 return MethodBench(
4192 EXTERNAL_CODECS_LOC_VARS
4193 complexInCommands,
4194 #ifndef Z7_ST
4195 true, numThreadsSpecified,
4196 &affinityMode,
4197 #endif
4198 method,
4199 uncompressedDataSize, (const Byte *)fileDataBuffer,
4200 kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4201 }
4202
4203 if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4204 methodName = "crc32";
4205
4206 CMethodId hashID;
4207 const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4208 int codecIndex = -1;
4209 bool isFilter = false;
4210 if (!isHashMethod)
4211 {
4212 UInt32 numStreams;
4213 codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4214 true, // encode
4215 hashID, numStreams, isFilter);
4216 // we can allow non filter for BW tests
4217 if (!isFilter) codecIndex = -1;
4218 }
4219
4220 CBenchCallbackToPrint callback;
4221 callback.Init();
4222 callback._file = printCallback;
4223
4224 if (isHashMethod || codecIndex != -1)
4225 {
4226 if (!printCallback)
4227 return S_FALSE;
4228 IBenchPrintCallback &f = *printCallback;
4229
4230 UInt64 dict64 = dict;
4231 if (!dictIsDefined)
4232 dict64 = (1 << 27);
4233 if (use_fileData)
4234 {
4235 if (!dictIsDefined)
4236 dict64 = fileDataBuffer.Size();
4237 else if (dict64 > fileDataBuffer.Size())
4238 dict64 = fileDataBuffer.Size();
4239 }
4240
4241 for (;;)
4242 {
4243 const int index = method.FindProp(NCoderPropID::kDictionarySize);
4244 if (index < 0)
4245 break;
4246 method.Props.Delete((unsigned)index);
4247 }
4248
4249 // methodName.RemoveChar(L'-');
4250 Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4251 const UInt32 *checkSum = NULL;
4252 int benchIndex = -1;
4253
4254 if (isHashMethod)
4255 {
4256 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4257 {
4258 const CBenchHash &h = g_Hash[i];
4259 AString benchMethod (h.Name);
4260 AString benchProps;
4261 const int propPos = benchMethod.Find(':');
4262 if (propPos >= 0)
4263 {
4264 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4265 benchMethod.DeleteFrom((unsigned)propPos);
4266 }
4267
4268 if (AreSameMethodNames(benchMethod, methodName))
4269 {
4270 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4271 /*
4272 bool isMainMethod = method.PropsString.IsEmpty();
4273 if (isMainMethod)
4274 isMainMethod = !checkSum
4275 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4276 if (sameProps || isMainMethod)
4277 */
4278 {
4279 complexity = (Int32)h.Complex;
4280 checkSum = &h.CheckSum;
4281 if (sameProps)
4282 break;
4283 /*
4284 if property. is not specified, we use the complexity
4285 for latest fastest method (crc32:64)
4286 */
4287 }
4288 }
4289 }
4290 // if (!checkSum) return E_NOTIMPL;
4291 }
4292 else
4293 {
4294 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4295 {
4296 const CBenchMethod &bench = g_Bench[i];
4297 AString benchMethod (bench.Name);
4298 AString benchProps;
4299 const int propPos = benchMethod.Find(':');
4300 if (propPos >= 0)
4301 {
4302 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4303 benchMethod.DeleteFrom((unsigned)propPos);
4304 }
4305
4306 if (AreSameMethodNames(benchMethod, methodName))
4307 {
4308 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4309 // bool isMainMethod = method.PropsString.IsEmpty();
4310 // if (sameProps || isMainMethod)
4311 {
4312 benchIndex = (int)i;
4313 if (sameProps)
4314 break;
4315 }
4316 }
4317 }
4318 // if (benchIndex < 0) return E_NOTIMPL;
4319 }
4320
4321 {
4322 /* we count usage only for crc and filter. non-filters are not supported */
4323 UInt64 usage = (1 << 20);
4324 UInt64 bufSize = dict64;
4325 UInt32 numBlocks = isHashMethod ? 1 : 3;
4326 if (use_fileData)
4327 {
4328 usage += fileDataBuffer.Size();
4329 if (bufSize > fileDataBuffer.Size())
4330 bufSize = fileDataBuffer.Size();
4331 if (isHashMethod)
4332 {
4333 numBlocks = 0;
4334 #ifndef Z7_ST
4335 if (numThreadsSpecified != 1)
4336 numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4337 #endif
4338 }
4339 }
4340 usage += numThreadsSpecified * bufSize * numBlocks;
4341 Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4342 }
4343
4344 CUIntVector numThreadsVector;
4345 {
4346 unsigned nt = numThreads_Start;
4347 for (;;)
4348 {
4349 if (nt > numThreadsSpecified)
4350 break;
4351 numThreadsVector.Add(nt);
4352 const unsigned next = nt * 2;
4353 const UInt32 ntHalf= numThreadsSpecified / 2;
4354 if (ntHalf > nt && ntHalf < next)
4355 numThreadsVector.Add(ntHalf);
4356 if (numThreadsSpecified > nt && numThreadsSpecified < next)
4357 numThreadsVector.Add(numThreadsSpecified);
4358 nt = next;
4359 }
4360 }
4361
4362 unsigned numColumns = isHashMethod ? 1 : 2;
4363 CTempValues speedTotals;
4364 CTempValues usageTotals;
4365 {
4366 const unsigned numItems = numThreadsVector.Size() * numColumns;
4367 speedTotals.Alloc(numItems);
4368 usageTotals.Alloc(numItems);
4369 for (unsigned i = 0; i < numItems; i++)
4370 {
4371 speedTotals.Values[i] = 0;
4372 usageTotals.Values[i] = 0;
4373 }
4374 }
4375
4376 f.NewLine();
4377 for (unsigned line = 0; line < 3; line++)
4378 {
4379 f.NewLine();
4380 f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size");
4381 FOR_VECTOR (ti, numThreadsVector)
4382 {
4383 if (ti != 0)
4384 Print_Delimiter(f);
4385 if (line == 0)
4386 {
4387 PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4388 PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4389 }
4390 else
4391 {
4392 for (unsigned c = 0; c < numColumns; c++)
4393 {
4394 PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1);
4395 PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1);
4396 }
4397 }
4398 }
4399 }
4400 f.NewLine();
4401
4402 UInt64 numSteps = 0;
4403
4404 // for (UInt32 iter = 0; iter < numIterations; iter++)
4405 // {
4406 unsigned pow = 10; // kNumHashDictBits
4407 if (startDicLog_Defined)
4408 pow = startDicLog;
4409
4410 // #define NUM_SUB_BITS 2
4411 // pow <<= NUM_SUB_BITS;
4412 for (;; pow++)
4413 {
4414 const UInt64 bufSize = (UInt64)1 << pow;
4415 // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4416 // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4417
4418 size_t dataSize = fileDataBuffer.Size();
4419 if (dataSize > bufSize || !use_fileData)
4420 dataSize = (size_t)bufSize;
4421
4422 for (UInt32 iter = 0; iter < numIterations; iter++)
4423 {
4424 Print_Pow(f, pow);
4425 // PrintNumber(f, bufSize >> 10, 4);
4426
4427 FOR_VECTOR (ti, numThreadsVector)
4428 {
4429 RINOK(f.CheckBreak())
4430 const UInt32 numThreads = numThreadsVector[ti];
4431 if (isHashMethod)
4432 {
4433 UInt64 speed = 0;
4434 UInt64 usage = 0;
4435 const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4436 numThreads,
4437 dataSize, (const Byte *)fileDataBuffer,
4438 speed, usage,
4439 (UInt32)complexity,
4440 1, // benchWeight,
4441 (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4442 method,
4443 &f,
4444 #ifndef Z7_ST
4445 &affinityMode,
4446 #endif
4447 false, // showRating
4448 NULL, false, 0);
4449 RINOK(res)
4450
4451 if (ti != 0)
4452 Print_Delimiter(f);
4453
4454 Bench_BW_Print_Usage_Speed(f, usage, speed);
4455 speedTotals.Values[ti] += speed;
4456 usageTotals.Values[ti] += usage;
4457 }
4458 else
4459 {
4460 {
4461 unsigned keySize = 32;
4462 if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4463 else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4464 callback.BenchProps.KeySize = keySize;
4465 }
4466
4467 COneMethodInfo method2 = method;
4468 unsigned bench_DictBits;
4469
4470 if (benchIndex >= 0)
4471 {
4472 const CBenchMethod &bench = g_Bench[benchIndex];
4473 callback.BenchProps.EncComplex = bench.EncComplex;
4474 callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4475 callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4476 bench_DictBits = bench.DictBits;
4477 // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4478 }
4479 else
4480 {
4481 bench_DictBits = kOldLzmaDictBits; // = 32 default
4482 if (isFilter)
4483 {
4484 const unsigned k_UnknownCoderComplexity = 4;
4485 callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4486 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4487 }
4488 else
4489 {
4490 callback.BenchProps.EncComplex = 1 << 10;
4491 callback.BenchProps.DecComplexUnc = 1 << 6;
4492 }
4493 callback.BenchProps.DecComplexCompr = 0;
4494 }
4495 callback.NeedPrint = false;
4496
4497 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4498 {
4499 const NCOM::CPropVariant propVariant = (UInt32)pow;
4500 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4501 }
4502
4503 const HRESULT res = MethodBench(
4504 EXTERNAL_CODECS_LOC_VARS
4505 complexInCommands,
4506 #ifndef Z7_ST
4507 false, // oldLzmaBenchMode
4508 numThreadsVector[ti],
4509 &affinityMode,
4510 #endif
4511 method2,
4512 dataSize, (const Byte *)fileDataBuffer,
4513 bench_DictBits,
4514 printCallback,
4515 &callback,
4516 &callback.BenchProps);
4517 RINOK(res)
4518
4519 if (ti != 0)
4520 Print_Delimiter(f);
4521
4522 for (unsigned i = 0; i < 2; i++)
4523 {
4524 const CBenchInfo &bi = callback.BenchInfo_Results[i];
4525 const UInt64 usage = bi.GetUsage();
4526 const UInt64 speed = bi.GetUnpackSizeSpeed();
4527 usageTotals.Values[ti * 2 + i] += usage;
4528 speedTotals.Values[ti * 2 + i] += speed;
4529 Bench_BW_Print_Usage_Speed(f, usage, speed);
4530 }
4531 }
4532 }
4533
4534 f.NewLine();
4535 numSteps++;
4536 }
4537 if (dataSize >= dict64)
4538 break;
4539 }
4540
4541 if (numSteps != 0)
4542 {
4543 f.Print("Avg:");
4544 for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4545 {
4546 if (ti != 0)
4547 Print_Delimiter(f);
4548 for (unsigned i = 0; i < numColumns; i++)
4549 Bench_BW_Print_Usage_Speed(f,
4550 usageTotals.Values[ti * numColumns + i] / numSteps,
4551 speedTotals.Values[ti * numColumns + i] / numSteps);
4552 }
4553 f.NewLine();
4554 }
4555
4556 return S_OK;
4557 }
4558
4559 bool use2Columns = false;
4560
4561 bool totalBenchMode = false;
4562 bool onlyHashBench = false;
4563 if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4564 {
4565 onlyHashBench = true;
4566 methodName = "*";
4567 totalBenchMode = true;
4568 }
4569 else if (methodName.Find('*') >= 0)
4570 totalBenchMode = true;
4571
4572 // ---------- Threads loop ----------
4573 for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4574 {
4575
4576 UInt32 numThreads = numThreadsSpecified;
4577
4578 if (!multiThreadTests)
4579 {
4580 if (threadsPassIndex != 0)
4581 break;
4582 }
4583 else
4584 {
4585 numThreads = 1;
4586 if (threadsPassIndex != 0)
4587 {
4588 if (numCPUs < 2)
4589 break;
4590 numThreads = numCPUs;
4591 if (threadsPassIndex == 1)
4592 {
4593 if (numCPUs >= 4)
4594 numThreads = numCPUs / 2;
4595 }
4596 else if (numCPUs < 4)
4597 break;
4598 }
4599 }
4600
4601 IBenchPrintCallback &f = *printCallback;
4602
4603 if (threadsPassIndex > 0)
4604 {
4605 f.NewLine();
4606 f.NewLine();
4607 }
4608
4609 if (!dictIsDefined && !onlyHashBench)
4610 {
4611 // we use dicSizeLog and dicSizeLog_Main for data size.
4612 // also we use it to reduce dictionary size of LZMA encoder via NCoderPropID::kReduceSize.
4613 const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4614 unsigned dicSizeLog = dicSizeLog_Main;
4615
4616 #ifdef UNDER_CE
4617 dicSizeLog = (UInt64)1 << 20;
4618 #endif
4619
4620 if (ramSize_Defined)
4621 for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4622 if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4623 break;
4624
4625 dict = (UInt64)1 << dicSizeLog;
4626
4627 if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4628 {
4629 f.Print("Dictionary reduced to: ");
4630 PrintNumber(f, dicSizeLog, 1);
4631 f.NewLine();
4632 }
4633 }
4634
4635 Print_Usage_and_Threads(f,
4636 onlyHashBench ?
4637 GetBenchMemoryUsage_Hash(numThreads, dict) :
4638 GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4639 numThreads);
4640
4641 f.NewLine();
4642
4643 f.NewLine();
4644
4645 if (totalBenchMode)
4646 {
4647 callback.NameFieldSize = kFieldSize_Name;
4648 use2Columns = false;
4649 }
4650 else
4651 {
4652 callback.NameFieldSize = kFieldSize_SmallName;
4653 use2Columns = true;
4654 }
4655 callback.Use2Columns = use2Columns;
4656
4657 bool showFreq = false;
4658 UInt64 cpuFreq = 0;
4659
4660 if (totalBenchMode)
4661 {
4662 showFreq = true;
4663 }
4664
4665 unsigned fileldSize = kFieldSize_TotalSize;
4666 if (showFreq)
4667 fileldSize += kFieldSize_EUAndEffec;
4668
4669 if (use2Columns)
4670 {
4671 PrintSpaces(f, callback.NameFieldSize);
4672 PrintRight(f, "Compressing", fileldSize);
4673 f.Print(kSep);
4674 PrintRight(f, "Decompressing", fileldSize);
4675 }
4676 f.NewLine();
4677 PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4678
4679 int j;
4680
4681 for (j = 0; j < 2; j++)
4682 {
4683 PrintRight(f, "Speed", kFieldSize_Speed + 1);
4684 PrintRight(f, "Usage", kFieldSize_Usage + 1);
4685 PrintRight(f, "R/U", kFieldSize_RU + 1);
4686 PrintRight(f, "Rating", kFieldSize_Rating + 1);
4687 if (showFreq)
4688 {
4689 PrintRight(f, "E/U", kFieldSize_EU + 1);
4690 PrintRight(f, "Effec", kFieldSize_Effec + 1);
4691 }
4692 if (!use2Columns)
4693 break;
4694 if (j == 0)
4695 f.Print(kSep);
4696 }
4697
4698 f.NewLine();
4699 PrintSpaces(f, callback.NameFieldSize);
4700
4701 for (j = 0; j < 2; j++)
4702 {
4703 PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4704 PrintRight(f, "%", kFieldSize_Usage + 1);
4705 PrintRight(f, "MIPS", kFieldSize_RU + 1);
4706 PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4707 if (showFreq)
4708 {
4709 PrintRight(f, "%", kFieldSize_EU + 1);
4710 PrintRight(f, "%", kFieldSize_Effec + 1);
4711 }
4712 if (!use2Columns)
4713 break;
4714 if (j == 0)
4715 f.Print(kSep);
4716 }
4717
4718 f.NewLine();
4719 f.NewLine();
4720
4721 if (specifiedFreq != 0)
4722 cpuFreq = specifiedFreq;
4723
4724 // bool showTotalSpeed = false;
4725
4726 if (totalBenchMode)
4727 {
4728 for (UInt32 i = 0; i < numIterations; i++)
4729 {
4730 if (i != 0)
4731 printCallback->NewLine();
4732
4733 const unsigned kNumCpuTests = 3;
4734 for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4735 {
4736 PrintLeft(f, "CPU", kFieldSize_Name);
4737
4738 // UInt32 resVal;
4739
4740 CFreqBench fb;
4741 fb.complexInCommands = complexInCommands;
4742 fb.numThreads = numThreads;
4743 // showFreq;
4744 fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4745 fb.specifiedFreq = specifiedFreq;
4746
4747 const HRESULT res = fb.FreqBench(printCallback
4748 #ifndef Z7_ST
4749 , &affinityMode
4750 #endif
4751 );
4752 RINOK(res)
4753
4754 cpuFreq = fb.CpuFreqRes;
4755 callback.NewLine();
4756
4757 if (specifiedFreq != 0)
4758 cpuFreq = specifiedFreq;
4759
4760 if (testTimeMs >= 1000)
4761 if (freqTest == kNumCpuTests - 1)
4762 {
4763 // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4764 }
4765 }
4766 callback.NewLine();
4767
4768 // return S_OK; // change it
4769
4770 callback.SetFreq(true, cpuFreq);
4771
4772 if (!onlyHashBench)
4773 {
4774 size_t dataSize = (size_t)dict;
4775 if (use_fileData)
4776 {
4777 dataSize = fileDataBuffer.Size();
4778 if (dictIsDefined && dataSize > dict)
4779 dataSize = (size_t)dict;
4780 }
4781
4782 const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4783 method, complexInCommands,
4784 #ifndef Z7_ST
4785 numThreads,
4786 &affinityMode,
4787 #endif
4788 dictIsDefined || use_fileData, // forceUnpackSize
4789 dataSize,
4790 (const Byte *)fileDataBuffer,
4791 printCallback, &callback);
4792 RINOK(res)
4793 }
4794
4795 {
4796 size_t dataSize = (size_t)1 << kNumHashDictBits;
4797 if (dictIsDefined)
4798 {
4799 dataSize = (size_t)dict;
4800 if (dataSize != dict)
4801 return E_OUTOFMEMORY;
4802 }
4803 if (use_fileData)
4804 {
4805 dataSize = fileDataBuffer.Size();
4806 if (dictIsDefined && dataSize > dict)
4807 dataSize = (size_t)dict;
4808 }
4809
4810 const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4811 method, complexInCommands,
4812 numThreads,
4813 dataSize, (const Byte *)fileDataBuffer,
4814 printCallback, &callback,
4815 #ifndef Z7_ST
4816 &affinityMode,
4817 #endif
4818 &callback.EncodeRes, true, cpuFreq);
4819 RINOK(res)
4820 }
4821
4822 callback.NewLine();
4823 {
4824 PrintLeft(f, "CPU", kFieldSize_Name);
4825
4826 CFreqBench fb;
4827 fb.complexInCommands = complexInCommands;
4828 fb.numThreads = numThreads;
4829 // showFreq;
4830 fb.showFreq = (specifiedFreq != 0);
4831 fb.specifiedFreq = specifiedFreq;
4832
4833 const HRESULT res = fb.FreqBench(printCallback
4834 #ifndef Z7_ST
4835 , &affinityMode
4836 #endif
4837 );
4838 RINOK(res)
4839 callback.NewLine();
4840 }
4841 }
4842 }
4843 else
4844 {
4845 needSetComplexity = true;
4846 if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4847 {
4848 unsigned i;
4849 for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4850 {
4851 const CBenchMethod &h = g_Bench[i];
4852 AString benchMethod (h.Name);
4853 AString benchProps;
4854 const int propPos = benchMethod.Find(':');
4855 if (propPos >= 0)
4856 {
4857 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4858 benchMethod.DeleteFrom((unsigned)propPos);
4859 }
4860
4861 if (AreSameMethodNames(benchMethod, methodName))
4862 {
4863 if (benchProps.IsEmpty()
4864 || (benchProps == "x5" && method.PropsString.IsEmpty())
4865 || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4866 {
4867 callback.BenchProps.EncComplex = h.EncComplex;
4868 callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4869 callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4870 needSetComplexity = false;
4871 break;
4872 }
4873 }
4874 }
4875 /*
4876 if (i == Z7_ARRAY_SIZE(g_Bench))
4877 return E_NOTIMPL;
4878 */
4879 }
4880 if (needSetComplexity)
4881 callback.BenchProps.SetLzmaCompexity();
4882
4883 if (startDicLog < kBenchMinDicLogSize)
4884 startDicLog = kBenchMinDicLogSize;
4885
4886 for (unsigned i = 0; i < numIterations; i++)
4887 {
4888 unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4889 if (!multiDict)
4890 pow = 32;
4891 while (GetDictSizeFromLog(pow) > dict && pow > 0)
4892 pow--;
4893 for (; GetDictSizeFromLog(pow) <= dict; pow++)
4894 {
4895 Print_Pow(f, pow);
4896 callback.DictSize = (UInt64)1 << pow;
4897
4898 COneMethodInfo method2 = method;
4899
4900 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4901 {
4902 // We add dictionary size property.
4903 // method2 can have two different dictionary size properties.
4904 // And last property is main.
4905 NCOM::CPropVariant propVariant = (UInt32)pow;
4906 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4907 }
4908
4909 size_t uncompressedDataSize;
4910 if (use_fileData)
4911 {
4912 uncompressedDataSize = fileDataBuffer.Size();
4913 }
4914 else
4915 {
4916 uncompressedDataSize = (size_t)callback.DictSize;
4917 if (uncompressedDataSize != callback.DictSize)
4918 return E_OUTOFMEMORY;
4919 if (uncompressedDataSize >= (1 << 18))
4920 uncompressedDataSize += kAdditionalSize;
4921 }
4922
4923 const HRESULT res = MethodBench(
4924 EXTERNAL_CODECS_LOC_VARS
4925 complexInCommands,
4926 #ifndef Z7_ST
4927 true, numThreads,
4928 &affinityMode,
4929 #endif
4930 method2,
4931 uncompressedDataSize, (const Byte *)fileDataBuffer,
4932 kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4933 f.NewLine();
4934 RINOK(res)
4935 if (!multiDict)
4936 break;
4937 }
4938 }
4939 }
4940
4941 PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4942
4943 if (use2Columns)
4944 {
4945 f.Print(kSep);
4946 PrintChars(f, '-', fileldSize);
4947 }
4948
4949 f.NewLine();
4950
4951 if (use2Columns)
4952 {
4953 PrintLeft(f, "Avr:", callback.NameFieldSize);
4954 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4955 f.Print(kSep);
4956 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4957 f.NewLine();
4958 }
4959
4960 PrintLeft(f, "Tot:", callback.NameFieldSize);
4961 CTotalBenchRes midRes;
4962 midRes = callback.EncodeRes;
4963 midRes.Update_With_Res(callback.DecodeRes);
4964
4965 // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4966 PrintTotals(f, showFreq, cpuFreq, false, midRes);
4967 f.NewLine();
4968
4969 }
4970 return S_OK;
4971 }
4972