xref: /aosp_15_r20/external/lz4/examples/bench_functions.c (revision 27162e4e17433d5aa7cb38e7b6a433a09405fc7f)
1 /*
2  * bench_functions.c
3  * Copyright  : Kyle Harper
4  * License    : Follows same licensing as the lz4.c/lz4.h program at any given time.  Currently, BSD 2.
5  * Description: A program to demonstrate the various compression functions involved in when using LZ4_compress_default().  The idea
6  *              is to show how each step in the call stack can be used directly, if desired.  There is also some benchmarking for
7  *              each function to demonstrate the (probably lack of) performance difference when jumping the stack.
8  *              (If you're new to lz4, please read simple_buffer.c to understand the fundamentals)
9  *
10  *              The call stack (before theoretical compiler optimizations) for LZ4_compress_default is as follows:
11  *                LZ4_compress_default
12  *                  LZ4_compress_fast
13  *                    LZ4_compress_fast_extState
14  *                      LZ4_compress_generic
15  *
16  *              LZ4_compress_default()
17  *                This is the recommended function for compressing data.  It will serve as the baseline for comparison.
18  *              LZ4_compress_fast()
19  *                Despite its name, it's not a "fast" version of compression.  It simply decides if HEAPMODE is set and either
20  *                allocates memory on the heap for a struct or creates the struct directly on the stack.  Stack access is generally
21  *                faster but this function itself isn't giving that advantage, it's just some logic for compile time.
22  *              LZ4_compress_fast_extState()
23  *                This simply accepts all the pointers and values collected thus far and adds logic to determine how
24  *                LZ4_compress_generic should be invoked; specifically: can the source fit into a single pass as determined by
25  *                LZ4_64Klimit.
26  *              LZ4_compress_generic()
27  *                As the name suggests, this is the generic function that ultimately does most of the heavy lifting.  Calling this
28  *                directly can help avoid some test cases and branching which might be useful in some implementation-specific
29  *                situations, but you really need to know what you're doing AND what you're asking lz4 to do!  You also need a
30  *                wrapper function because this function isn't exposed with lz4.h.
31  *
32  *              The call stack for decompression functions is shallow.  There are 2 options:
33  *                LZ4_decompress_safe  ||  LZ4_decompress_fast
34  *                  LZ4_decompress_generic
35  *
36  *               LZ4_decompress_safe
37  *                 This is the recommended function for decompressing data.  It is considered safe because the caller specifies
38  *                 both the size of the compressed buffer to read as well as the maximum size of the output (decompressed) buffer
39  *                 instead of just the latter.
40  *               LZ4_decompress_fast
41  *                 Again, despite its name it's not a "fast" version of decompression.  It simply frees the caller of sending the
42  *                 size of the compressed buffer (it will simply be read-to-end, hence it's non-safety).
43  *               LZ4_decompress_generic
44  *                 This is the generic function that both of the LZ4_decompress_* functions above end up calling.  Calling this
45  *                 directly is not advised, period.  Furthermore, it is a static inline function in lz4.c, so there isn't a symbol
46  *                 exposed for anyone using lz4.h to utilize.
47  *
48  *               Special Note About Decompression:
49  *               Using the LZ4_decompress_safe() function protects against malicious (user) input.  If you are using data from a
50  *               trusted source, or if your program is the producer (P) as well as its consumer (C) in a PC or MPMC setup, you can
51  *               safely use the LZ4_decompress_fast function.
52  */
53 
54 /* Since lz4 compiles with c99 and not gnu/std99 we need to enable POSIX linking for time.h structs and functions. */
55 #if __STDC_VERSION__ >= 199901L
56 #define _XOPEN_SOURCE 600
57 #else
58 #define _XOPEN_SOURCE 500
59 #endif
60 #define _POSIX_C_SOURCE 199309L
61 
62 /* Includes, for Power! */
63 #define LZ4_DISABLE_DEPRECATE_WARNINGS   /* LZ4_decompress_fast */
64 #include "lz4.h"
65 #include <stdio.h>    /* for printf() */
66 #include <stdlib.h>   /* for exit() */
67 #include <string.h>   /* for atoi() memcmp() */
68 #include <stdint.h>   /* for uint_types */
69 #include <inttypes.h> /* for PRIu64 */
70 #include <time.h>     /* for clock() */
71 #include <locale.h>   /* for setlocale() */
72 #include <limits.h>   /* for INT_MAX */
73 #include <assert.h>
74 
75 /* We need to know what one billion is for clock timing. */
76 #define BILLION 1000000000L
77 
78 /* Create a crude set of test IDs so we can switch on them later  (Can't switch() on a char[] or char*). */
79 #define ID__LZ4_COMPRESS_DEFAULT        1
80 #define ID__LZ4_COMPRESS_FAST           2
81 #define ID__LZ4_COMPRESS_FAST_EXTSTATE  3
82 #define ID__LZ4_COMPRESS_GENERIC        4
83 #define ID__LZ4_DECOMPRESS_SAFE         5
84 #define ID__LZ4_DECOMPRESS_FAST         6
85 
86 
87 /*
88  * Easy show-error-and-bail function.
89  */
run_screaming(const char * message,const int code)90 void run_screaming(const char *message, const int code) {
91   printf("%s \n", message);
92   exit(code);
93 }
94 
95 /*
96  * Centralize the usage function to keep main cleaner.
97  */
usage(const char * exeName,const char * message)98 void usage(const char* exeName, const char* message) {
99   printf("Usage: %s <iterations> \n", exeName);
100   run_screaming(message, 1);
101   return;
102 }
103 
104 #define CHECK(c) { if (!(c)) { run_screaming(#c, 1); } }
105 
106 
107 /*
108  * Runs the benchmark for LZ4_* function based on function_id.
109  * @return : benchmark duration, in ns
110  */
bench(const char * known_good_dst,const int function_id,int iterations,const char * src,char * dst,const size_t src_size,const size_t max_dst_size,const size_t comp_size)111 uint64_t bench(
112     const char *known_good_dst,
113     const int function_id,
114     int iterations,
115     const char *src,
116     char *dst,
117     const size_t src_size,
118     const size_t max_dst_size,
119     const size_t comp_size
120   ) {
121   int rv = 0;
122   const int warm_up = 5000;
123   const int acceleration = 1;
124   LZ4_stream_t state;
125   clock_t start = clock();
126 
127   // Select the right function to perform the benchmark on.  We perform 5000 initial loops to warm the cache and ensure that dst
128   // remains matching to known_good_dst between successive calls.
129   switch(function_id) {
130     case ID__LZ4_COMPRESS_DEFAULT:
131       printf("Starting benchmark for function: LZ4_compress_default()\n");
132       for(int junk=0; junk<warm_up; junk++)
133         rv = LZ4_compress_default(src, dst, (int)src_size, (int)max_dst_size);
134       if (rv < 1)
135         run_screaming("Couldn't run LZ4_compress_default()... error code received is in exit code.", rv);
136       if (memcmp(known_good_dst, dst, max_dst_size) != 0)
137         run_screaming("According to memcmp(), the compressed dst we got doesn't match the known_good_dst... ruh roh.", 1);
138       start = clock();
139       for (int i=1; i<=iterations; i++)
140         LZ4_compress_default(src, dst, (int)src_size, (int)max_dst_size);
141       break;
142 
143     case ID__LZ4_COMPRESS_FAST:
144       printf("Starting benchmark for function: LZ4_compress_fast()\n");
145       for(int junk=0; junk<warm_up; junk++)
146         rv = LZ4_compress_fast(src, dst, (int)src_size, (int)max_dst_size, acceleration);
147       if (rv < 1)
148         run_screaming("Couldn't run LZ4_compress_fast()... error code received is in exit code.", rv);
149       if (memcmp(known_good_dst, dst, max_dst_size) != 0)
150         run_screaming("According to memcmp(), the compressed dst we got doesn't match the known_good_dst... ruh roh.", 1);
151       start = clock();
152       for (int i=1; i<=iterations; i++)
153         LZ4_compress_fast(src, dst, (int)src_size, (int)max_dst_size, acceleration);
154       break;
155 
156     case ID__LZ4_COMPRESS_FAST_EXTSTATE:
157       printf("Starting benchmark for function: LZ4_compress_fast_extState()\n");
158       for(int junk=0; junk<warm_up; junk++)
159         rv = LZ4_compress_fast_extState(&state, src, dst, (int)src_size, (int)max_dst_size, acceleration);
160       if (rv < 1)
161         run_screaming("Couldn't run LZ4_compress_fast_extState()... error code received is in exit code.", rv);
162       if (memcmp(known_good_dst, dst, max_dst_size) != 0)
163         run_screaming("According to memcmp(), the compressed dst we got doesn't match the known_good_dst... ruh roh.", 1);
164       start = clock();
165       for (int i=1; i<=iterations; i++)
166         LZ4_compress_fast_extState(&state, src, dst, (int)src_size, (int)max_dst_size, acceleration);
167       break;
168 
169 //    Disabled until LZ4_compress_generic() is exposed in the header.
170 //    case ID__LZ4_COMPRESS_GENERIC:
171 //      printf("Starting benchmark for function: LZ4_compress_generic()\n");
172 //      LZ4_resetStream((LZ4_stream_t*)&state);
173 //      for(int junk=0; junk<warm_up; junk++) {
174 //        LZ4_resetStream((LZ4_stream_t*)&state);
175 //        //rv = LZ4_compress_generic_wrapper(&state, src, dst, src_size, max_dst_size, notLimited, byU16, noDict, noDictIssue, acceleration);
176 //        LZ4_compress_generic_wrapper(&state, src, dst, src_size, max_dst_size, acceleration);
177 //      }
178 //      if (rv < 1)
179 //        run_screaming("Couldn't run LZ4_compress_generic()... error code received is in exit code.", rv);
180 //      if (memcmp(known_good_dst, dst, max_dst_size) != 0)
181 //        run_screaming("According to memcmp(), the compressed dst we got doesn't match the known_good_dst... ruh roh.", 1);
182 //      for (int i=1; i<=iterations; i++) {
183 //        LZ4_resetStream((LZ4_stream_t*)&state);
184 //        //LZ4_compress_generic_wrapper(&state, src, dst, src_size, max_dst_size, notLimited, byU16, noDict, noDictIssue, acceleration);
185 //        LZ4_compress_generic_wrapper(&state, src, dst, src_size, max_dst_size, acceleration);
186 //      }
187 //      break;
188 
189     case ID__LZ4_DECOMPRESS_SAFE:
190       printf("Starting benchmark for function: LZ4_decompress_safe()\n");
191       for(int junk=0; junk<warm_up; junk++)
192         rv = LZ4_decompress_safe(src, dst, (int)comp_size, (int)src_size);
193       if (rv < 1)
194         run_screaming("Couldn't run LZ4_decompress_safe()... error code received is in exit code.", rv);
195       if (memcmp(known_good_dst, dst, src_size) != 0)
196         run_screaming("According to memcmp(), the compressed dst we got doesn't match the known_good_dst... ruh roh.", 1);
197       start = clock();
198       for (int i=1; i<=iterations; i++)
199         LZ4_decompress_safe(src, dst, (int)comp_size, (int)src_size);
200       break;
201 
202     case ID__LZ4_DECOMPRESS_FAST:
203       printf("Starting benchmark for function: LZ4_decompress_fast()\n");
204       for(int junk=0; junk<warm_up; junk++)
205         rv = LZ4_decompress_fast(src, dst, (int)src_size);
206       if (rv < 1)
207         run_screaming("Couldn't run LZ4_decompress_fast()... error code received is in exit code.", rv);
208       if (memcmp(known_good_dst, dst, src_size) != 0)
209         run_screaming("According to memcmp(), the compressed dst we got doesn't match the known_good_dst... ruh roh.", 1);
210       start = clock();
211       for (int i=1; i<=iterations; i++)
212         LZ4_decompress_fast(src, dst, (int)src_size);
213       break;
214 
215     default:
216       run_screaming("The test specified isn't valid.  Please check your code.", 1);
217       break;
218   }
219 
220   { clock_t end = clock();
221 
222     // Low resolution timer => requires more iterations to measure something
223     if (end == start) {
224       assert(iterations < (INT_MAX / 10));
225       iterations *= 10;
226       printf("not enough iterations => increase nb of iterations to %i \n", iterations);
227       return bench(known_good_dst, function_id, iterations, src, dst, src_size, max_dst_size, comp_size);
228     }
229 
230     return (uint64_t)((double)(end - start) / CLOCKS_PER_SEC * BILLION);
231   }
232 }
233 
234 
235 /*
236  * main()
237  * We will demonstrate the use of each function for simplicity sake.  Then we will run 2 suites of benchmarking:
238  * Test suite A)  Uses generic Lorem Ipsum text which should be generally compressible insomuch as basic human text is
239  *                compressible for such a small src_size
240  * Test Suite B)  For the sake of testing, see what results we get if the data is drastically easier to compress.  IF there are
241  *                indeed losses and IF more compressible data is faster to process, this will exacerbate the findings.
242  */
main(int argc,char ** argv)243 int main(int argc, char **argv) {
244   // Get and verify options.  There's really only 1:  How many iterations to run.
245   const char* exeName = argv[0];
246   int iterations = 1000000;
247   if (argc > 1)
248     iterations = atoi(argv[1]);
249   if (iterations < 1)
250     usage(exeName, "Argument 1 (iterations) must be > 0.");
251 
252   // First we will create 2 sources (char *) of 2000 bytes each.  One normal text, the other highly-compressible text.
253   const char src[]    = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed luctus purus et risus vulputate, et mollis orci ullamcorper. Nulla facilisi. Fusce in ligula sed purus varius aliquet interdum vitae justo. Proin quis diam velit. Nulla varius iaculis auctor. Cras volutpat, justo eu dictum pulvinar, elit sem porttitor metus, et imperdiet metus sapien et ante. Nullam nisi nulla, ornare eu tristique eu, dignissim vitae diam. Nulla sagittis porta libero, a accumsan felis sagittis scelerisque.  Integer laoreet eleifend congue. Etiam rhoncus leo vel dolor fermentum, quis luctus nisl iaculis. Praesent a erat sapien. Aliquam semper mi in lorem ultrices ultricies. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In feugiat risus sed enim ultrices, at sodales nulla tristique. Maecenas eget pellentesque justo, sed pellentesque lectus. Fusce sagittis sit amet elit vel varius. Donec sed ligula nec ligula vulputate rutrum sed ut lectus. Etiam congue pharetra leo vitae cursus. Morbi enim ante, porttitor ut varius vel, tincidunt quis justo. Nunc iaculis, risus id ultrices semper, metus est efficitur ligula, vel posuere risus nunc eget purus. Ut lorem turpis, condimentum at sem sed, porta aliquam turpis. In ut sapien a nulla dictum tincidunt quis sit amet lorem. Fusce at est egestas, luctus neque eu, consectetur tortor. Phasellus eleifend ultricies nulla ac lobortis.  Morbi maximus quam cursus vehicula iaculis. Maecenas cursus vel justo ut rutrum. Curabitur magna orci, dignissim eget dapibus vitae, finibus id lacus. Praesent rhoncus mattis augue vitae bibendum. Praesent porta mauris non ultrices fermentum. Quisque vulputate ipsum in sodales pulvinar. Aliquam nec mollis felis. Donec vitae augue pulvinar, congue nisl sed, pretium purus. Fusce lobortis mi ac neque scelerisque semper. Pellentesque vel est vitae magna aliquet aliquet. Nam non dolor. Nulla facilisi. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Morbi ac lacinia felis metus.";
254   const char hc_src[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
255   // Set and derive sizes.  Since we're using strings, use strlen() + 1 for \0.
256   const size_t src_size = strlen(src) + 1;
257   const size_t max_dst_size = (size_t)LZ4_compressBound((int)src_size);
258   int bytes_returned = 0;
259   // Now build allocations for the data we'll be playing with.
260   char *dst               = (char*)calloc(1, max_dst_size);
261   char *known_good_dst    = (char*)calloc(1, max_dst_size);
262   char *known_good_hc_dst = (char*)calloc(1, max_dst_size);
263   if (dst == NULL || known_good_dst == NULL || known_good_hc_dst == NULL)
264     run_screaming("Couldn't allocate memory for the destination buffers.  Sad :(", 1);
265 
266   // Create known-good buffers to verify our tests with other functions will produce the same results.
267   bytes_returned = LZ4_compress_default(src, known_good_dst, (int)src_size, (int)max_dst_size);
268   if (bytes_returned < 1)
269     run_screaming("Couldn't create a known-good destination buffer for comparison... this is bad.", 1);
270   const size_t src_comp_size = bytes_returned;
271   bytes_returned = LZ4_compress_default(hc_src, known_good_hc_dst, (int)src_size, (int)max_dst_size);
272   if (bytes_returned < 1)
273     run_screaming("Couldn't create a known-good (highly compressible) destination buffer for comparison... this is bad.", 1);
274   const size_t hc_src_comp_size = bytes_returned;
275 
276 
277   /* LZ4_compress_default() */
278   // This is the default function so we don't need to demonstrate how to use it.  See basics.c if you need more basal information.
279 
280   /* LZ4_compress_fast() */
281   // Using this function is identical to LZ4_compress_default except we need to specify an "acceleration" value.  Defaults to 1.
282   memset(dst, 0, max_dst_size);
283   bytes_returned = LZ4_compress_fast(src, dst, (int)src_size, (int)max_dst_size, 1);
284   if (bytes_returned < 1)
285     run_screaming("Failed to compress src using LZ4_compress_fast.  echo $? for return code.", bytes_returned);
286   if (memcmp(dst, known_good_dst, bytes_returned) != 0)
287     run_screaming("According to memcmp(), the value we got in dst from LZ4_compress_fast doesn't match the known-good value.  This is bad.", 1);
288 
289   /* LZ4_compress_fast_extState() */
290   // Using this function directly requires that we build an LZ4_stream_t struct ourselves.  We do NOT have to reset it ourselves.
291   memset(dst, 0, max_dst_size);
292   LZ4_stream_t state;
293   bytes_returned = LZ4_compress_fast_extState(&state, src, dst, (int)src_size, (int)max_dst_size, 1);
294   if (bytes_returned < 1)
295     run_screaming("Failed to compress src using LZ4_compress_fast_extState.  echo $? for return code.", bytes_returned);
296   if (memcmp(dst, known_good_dst, bytes_returned) != 0)
297     run_screaming("According to memcmp(), the value we got in dst from LZ4_compress_fast_extState doesn't match the known-good value.  This is bad.", 1);
298 
299   /* LZ4_compress_generic */
300   // When you can exactly control the inputs and options of your LZ4 needs, you can use LZ4_compress_generic and fixed (const)
301   // values for the enum types such as dictionary and limitations.  Any other direct-use is probably a bad idea.
302   //
303   // That said, the LZ4_compress_generic() function is 'static inline' and does not have a prototype in lz4.h to expose a symbol
304   // for it.  In other words: we can't access it directly.  I don't want to submit a PR that modifies lz4.c/h.  Yann and others can
305   // do that if they feel it's worth expanding this example.
306   //
307   // I will, however, leave a skeleton of what would be required to use it directly:
308   /*
309     memset(dst, 0, max_dst_size);
310     // LZ4_stream_t state:  is already declared above.  We can reuse it BUT we have to reset the stream ourselves between each call.
311     LZ4_resetStream((LZ4_stream_t *)&state);
312     // Since src size is small we know the following enums will be used:  notLimited (0), byU16 (2), noDict (0), noDictIssue (0).
313     bytes_returned = LZ4_compress_generic(&state, src, dst, src_size, max_dst_size, notLimited, byU16, noDict, noDictIssue, 1);
314     if (bytes_returned < 1)
315       run_screaming("Failed to compress src using LZ4_compress_generic.  echo $? for return code.", bytes_returned);
316     if (memcmp(dst, known_good_dst, bytes_returned) != 0)
317       run_screaming("According to memcmp(), the value we got in dst from LZ4_compress_generic doesn't match the known-good value.  This is bad.", 1);
318   */
319 
320 
321   /* Benchmarking */
322   /* Now we'll run a few rudimentary benchmarks with each function to demonstrate differences in speed based on the function used.
323    * Remember, we cannot call LZ4_compress_generic() directly (yet) so it's disabled.
324    */
325   // Suite A - Normal Compressibility
326   char *dst_d = (char*)calloc(1, src_size);
327   CHECK(dst_d!=NULL);
328   memset(dst, 0, max_dst_size);
329   printf("\nStarting suite A:  Normal compressible text.\n");
330   uint64_t time_taken__default       = bench(known_good_dst, ID__LZ4_COMPRESS_DEFAULT,       iterations, src,            dst,   src_size, max_dst_size, src_comp_size);
331   uint64_t time_taken__fast          = bench(known_good_dst, ID__LZ4_COMPRESS_FAST,          iterations, src,            dst,   src_size, max_dst_size, src_comp_size);
332   uint64_t time_taken__fast_extstate = bench(known_good_dst, ID__LZ4_COMPRESS_FAST_EXTSTATE, iterations, src,            dst,   src_size, max_dst_size, src_comp_size);
333   //uint64_t time_taken__generic       = bench(known_good_dst, ID__LZ4_COMPRESS_GENERIC,       iterations, src,            dst,   src_size, max_dst_size, src_comp_size);
334   uint64_t time_taken__decomp_safe   = bench(src,            ID__LZ4_DECOMPRESS_SAFE,        iterations, known_good_dst, dst_d, src_size, max_dst_size, src_comp_size);
335   uint64_t time_taken__decomp_fast   = bench(src,            ID__LZ4_DECOMPRESS_FAST,        iterations, known_good_dst, dst_d, src_size, max_dst_size, src_comp_size);
336   // Suite B - Highly Compressible
337   memset(dst, 0, max_dst_size);
338   printf("\nStarting suite B:  Highly compressible text.\n");
339   uint64_t time_taken_hc__default       = bench(known_good_hc_dst, ID__LZ4_COMPRESS_DEFAULT,       iterations, hc_src,            dst,   src_size, max_dst_size, hc_src_comp_size);
340   uint64_t time_taken_hc__fast          = bench(known_good_hc_dst, ID__LZ4_COMPRESS_FAST,          iterations, hc_src,            dst,   src_size, max_dst_size, hc_src_comp_size);
341   uint64_t time_taken_hc__fast_extstate = bench(known_good_hc_dst, ID__LZ4_COMPRESS_FAST_EXTSTATE, iterations, hc_src,            dst,   src_size, max_dst_size, hc_src_comp_size);
342   //uint64_t time_taken_hc__generic       = bench(known_good_hc_dst, ID__LZ4_COMPRESS_GENERIC,       iterations, hc_src,            dst,   src_size, max_dst_size, hc_src_comp_size);
343   uint64_t time_taken_hc__decomp_safe   = bench(hc_src,            ID__LZ4_DECOMPRESS_SAFE,        iterations, known_good_hc_dst, dst_d, src_size, max_dst_size, hc_src_comp_size);
344   uint64_t time_taken_hc__decomp_fast   = bench(hc_src,            ID__LZ4_DECOMPRESS_FAST,        iterations, known_good_hc_dst, dst_d, src_size, max_dst_size, hc_src_comp_size);
345 
346   // Report and leave.
347   setlocale(LC_ALL, "");
348   const char *format        = "|%-14s|%-30s|%'14.9f|%'16d|%'14llu|%'13.2f%%|\n";
349   const char *header_format = "|%-14s|%-30s|%14s|%16s|%14s|%14s|\n";
350   const char *separator     = "+--------------+------------------------------+--------------+----------------+--------------+--------------+\n";
351   uint64_t iterllu = (uint64_t)iterations;
352   printf("\n");
353   printf("%s", separator);
354   printf(header_format, "Source", "Function Benchmarked", "Total Seconds", "Iterations/sec", "ns/Iteration", "% of default");
355   printf("%s", separator);
356   printf(format, "Normal Text", "LZ4_compress_default()",       (double)time_taken__default       / BILLION, (int)(iterations / ((double)time_taken__default       /BILLION)), time_taken__default       / iterllu, (double)time_taken__default       * 100 / time_taken__default);
357   printf(format, "Normal Text", "LZ4_compress_fast()",          (double)time_taken__fast          / BILLION, (int)(iterations / ((double)time_taken__fast          /BILLION)), time_taken__fast          / iterllu, (double)time_taken__fast          * 100 / time_taken__default);
358   printf(format, "Normal Text", "LZ4_compress_fast_extState()", (double)time_taken__fast_extstate / BILLION, (int)(iterations / ((double)time_taken__fast_extstate /BILLION)), time_taken__fast_extstate / iterllu, (double)time_taken__fast_extstate * 100 / time_taken__default);
359   //printf(format, "Normal Text", "LZ4_compress_generic()",       (double)time_taken__generic       / BILLION, (int)(iterations / ((double)time_taken__generic       /BILLION)), (int)time_taken__generic       / iterations, (double)time_taken__generic       * 100 / time_taken__default);
360   printf(format, "Normal Text", "LZ4_decompress_safe()",        (double)time_taken__decomp_safe   / BILLION, (int)(iterations / ((double)time_taken__decomp_safe   /BILLION)), time_taken__decomp_safe   / iterllu, (double)time_taken__decomp_safe   * 100 / time_taken__default);
361   printf(format, "Normal Text", "LZ4_decompress_fast()",        (double)time_taken__decomp_fast   / BILLION, (int)(iterations / ((double)time_taken__decomp_fast   /BILLION)), time_taken__decomp_fast   / iterllu, (double)time_taken__decomp_fast   * 100 / time_taken__default);
362   printf(header_format, "", "", "", "", "", "");
363   printf(format, "Compressible", "LZ4_compress_default()",       (double)time_taken_hc__default       / BILLION, (int)(iterations / ((double)time_taken_hc__default       /BILLION)), time_taken_hc__default       / iterllu, (double)time_taken_hc__default       * 100 / time_taken_hc__default);
364   printf(format, "Compressible", "LZ4_compress_fast()",          (double)time_taken_hc__fast          / BILLION, (int)(iterations / ((double)time_taken_hc__fast          /BILLION)), time_taken_hc__fast          / iterllu, (double)time_taken_hc__fast          * 100 / time_taken_hc__default);
365   printf(format, "Compressible", "LZ4_compress_fast_extState()", (double)time_taken_hc__fast_extstate / BILLION, (int)(iterations / ((double)time_taken_hc__fast_extstate /BILLION)), time_taken_hc__fast_extstate / iterllu, (double)time_taken_hc__fast_extstate * 100 / time_taken_hc__default);
366   //printf(format, "Compressible", "LZ4_compress_generic()",       (double)time_taken_hc__generic       / BILLION, (int)(iterations / ((double)time_taken_hc__generic       /BILLION)), (int)time_taken_hc__generic       / iterations, (double)time_taken_hc__generic       * 100 / time_taken_hc__default);
367   printf(format, "Compressible", "LZ4_decompress_safe()",        (double)time_taken_hc__decomp_safe   / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_safe   /BILLION)), time_taken_hc__decomp_safe   / iterllu, (double)time_taken_hc__decomp_safe   * 100 / time_taken_hc__default);
368   printf(format, "Compressible", "LZ4_decompress_fast()",        (double)time_taken_hc__decomp_fast   / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_fast   /BILLION)), time_taken_hc__decomp_fast   / iterllu, (double)time_taken_hc__decomp_fast   * 100 / time_taken_hc__default);
369   printf("%s", separator);
370   printf("\n");
371   printf("All done, ran %d iterations per test.\n", iterations);
372   return 0;
373 }
374