1*86ee64e7SAndroid Build Coastguard Workerdiff --git a/crc32.c b/crc32.c 2*86ee64e7SAndroid Build Coastguard Workerindex 9580440c0e6b..9162429cc7b4 100644 3*86ee64e7SAndroid Build Coastguard Worker--- a/crc32.c 4*86ee64e7SAndroid Build Coastguard Worker+++ b/crc32.c 5*86ee64e7SAndroid Build Coastguard Worker@@ -28,6 +28,8 @@ 6*86ee64e7SAndroid Build Coastguard Worker # endif /* !DYNAMIC_CRC_TABLE */ 7*86ee64e7SAndroid Build Coastguard Worker #endif /* MAKECRCH */ 8*86ee64e7SAndroid Build Coastguard Worker 9*86ee64e7SAndroid Build Coastguard Worker+#include "deflate.h" 10*86ee64e7SAndroid Build Coastguard Worker+#include "x86.h" 11*86ee64e7SAndroid Build Coastguard Worker #include "zutil.h" /* for STDC and FAR definitions */ 12*86ee64e7SAndroid Build Coastguard Worker 13*86ee64e7SAndroid Build Coastguard Worker /* Definitions for doing the crc four data bytes at a time. */ 14*86ee64e7SAndroid Build Coastguard Worker@@ -440,3 +442,28 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) 15*86ee64e7SAndroid Build Coastguard Worker { 16*86ee64e7SAndroid Build Coastguard Worker return crc32_combine_(crc1, crc2, len2); 17*86ee64e7SAndroid Build Coastguard Worker } 18*86ee64e7SAndroid Build Coastguard Worker+ 19*86ee64e7SAndroid Build Coastguard Worker+ZLIB_INTERNAL void crc_reset(deflate_state *const s) 20*86ee64e7SAndroid Build Coastguard Worker+{ 21*86ee64e7SAndroid Build Coastguard Worker+ if (x86_cpu_enable_simd) { 22*86ee64e7SAndroid Build Coastguard Worker+ crc_fold_init(s); 23*86ee64e7SAndroid Build Coastguard Worker+ return; 24*86ee64e7SAndroid Build Coastguard Worker+ } 25*86ee64e7SAndroid Build Coastguard Worker+ s->strm->adler = crc32(0L, Z_NULL, 0); 26*86ee64e7SAndroid Build Coastguard Worker+} 27*86ee64e7SAndroid Build Coastguard Worker+ 28*86ee64e7SAndroid Build Coastguard Worker+ZLIB_INTERNAL void crc_finalize(deflate_state *const s) 29*86ee64e7SAndroid Build Coastguard Worker+{ 30*86ee64e7SAndroid Build Coastguard Worker+ if (x86_cpu_enable_simd) 31*86ee64e7SAndroid Build Coastguard Worker+ s->strm->adler = crc_fold_512to32(s); 32*86ee64e7SAndroid Build Coastguard Worker+} 33*86ee64e7SAndroid Build Coastguard Worker+ 34*86ee64e7SAndroid Build Coastguard Worker+ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size) 35*86ee64e7SAndroid Build Coastguard Worker+{ 36*86ee64e7SAndroid Build Coastguard Worker+ if (x86_cpu_enable_simd) { 37*86ee64e7SAndroid Build Coastguard Worker+ crc_fold_copy(strm->state, dst, strm->next_in, size); 38*86ee64e7SAndroid Build Coastguard Worker+ return; 39*86ee64e7SAndroid Build Coastguard Worker+ } 40*86ee64e7SAndroid Build Coastguard Worker+ zmemcpy(dst, strm->next_in, size); 41*86ee64e7SAndroid Build Coastguard Worker+ strm->adler = crc32(strm->adler, dst, size); 42*86ee64e7SAndroid Build Coastguard Worker+} 43*86ee64e7SAndroid Build Coastguard Workerdiff --git a/crc_folding.c b/crc_folding.c 44*86ee64e7SAndroid Build Coastguard Workernew file mode 100644 45*86ee64e7SAndroid Build Coastguard Workerindex 000000000000..48d77744aaf4 46*86ee64e7SAndroid Build Coastguard Worker--- /dev/null 47*86ee64e7SAndroid Build Coastguard Worker+++ b/crc_folding.c 48*86ee64e7SAndroid Build Coastguard Worker@@ -0,0 +1,493 @@ 49*86ee64e7SAndroid Build Coastguard Worker+/* 50*86ee64e7SAndroid Build Coastguard Worker+ * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ 51*86ee64e7SAndroid Build Coastguard Worker+ * instruction. 52*86ee64e7SAndroid Build Coastguard Worker+ * 53*86ee64e7SAndroid Build Coastguard Worker+ * A white paper describing this algorithm can be found at: 54*86ee64e7SAndroid Build Coastguard Worker+ * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf 55*86ee64e7SAndroid Build Coastguard Worker+ * 56*86ee64e7SAndroid Build Coastguard Worker+ * Copyright (C) 2013 Intel Corporation. All rights reserved. 57*86ee64e7SAndroid Build Coastguard Worker+ * Authors: 58*86ee64e7SAndroid Build Coastguard Worker+ * Wajdi Feghali <[email protected]> 59*86ee64e7SAndroid Build Coastguard Worker+ * Jim Guilford <[email protected]> 60*86ee64e7SAndroid Build Coastguard Worker+ * Vinodh Gopal <[email protected]> 61*86ee64e7SAndroid Build Coastguard Worker+ * Erdinc Ozturk <[email protected]> 62*86ee64e7SAndroid Build Coastguard Worker+ * Jim Kukunas <[email protected]> 63*86ee64e7SAndroid Build Coastguard Worker+ * 64*86ee64e7SAndroid Build Coastguard Worker+ * For conditions of distribution and use, see copyright notice in zlib.h 65*86ee64e7SAndroid Build Coastguard Worker+ */ 66*86ee64e7SAndroid Build Coastguard Worker+ 67*86ee64e7SAndroid Build Coastguard Worker+#include "deflate.h" 68*86ee64e7SAndroid Build Coastguard Worker+ 69*86ee64e7SAndroid Build Coastguard Worker+#include <inttypes.h> 70*86ee64e7SAndroid Build Coastguard Worker+#include <emmintrin.h> 71*86ee64e7SAndroid Build Coastguard Worker+#include <immintrin.h> 72*86ee64e7SAndroid Build Coastguard Worker+#include <wmmintrin.h> 73*86ee64e7SAndroid Build Coastguard Worker+ 74*86ee64e7SAndroid Build Coastguard Worker+#define CRC_LOAD(s) \ 75*86ee64e7SAndroid Build Coastguard Worker+ do { \ 76*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);\ 77*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);\ 78*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);\ 79*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);\ 80*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_crc_part = _mm_loadu_si128((__m128i *)s->crc0 + 4); 81*86ee64e7SAndroid Build Coastguard Worker+ 82*86ee64e7SAndroid Build Coastguard Worker+#define CRC_SAVE(s) \ 83*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)s->crc0 + 0, xmm_crc0);\ 84*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)s->crc0 + 1, xmm_crc1);\ 85*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)s->crc0 + 2, xmm_crc2);\ 86*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)s->crc0 + 3, xmm_crc3);\ 87*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part);\ 88*86ee64e7SAndroid Build Coastguard Worker+ } while (0); 89*86ee64e7SAndroid Build Coastguard Worker+ 90*86ee64e7SAndroid Build Coastguard Worker+ZLIB_INTERNAL void crc_fold_init(deflate_state *const s) 91*86ee64e7SAndroid Build Coastguard Worker+{ 92*86ee64e7SAndroid Build Coastguard Worker+ CRC_LOAD(s) 93*86ee64e7SAndroid Build Coastguard Worker+ 94*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc0 = _mm_cvtsi32_si128(0x9db42487); 95*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = _mm_setzero_si128(); 96*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_setzero_si128(); 97*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_setzero_si128(); 98*86ee64e7SAndroid Build Coastguard Worker+ 99*86ee64e7SAndroid Build Coastguard Worker+ CRC_SAVE(s) 100*86ee64e7SAndroid Build Coastguard Worker+ 101*86ee64e7SAndroid Build Coastguard Worker+ s->strm->adler = 0; 102*86ee64e7SAndroid Build Coastguard Worker+} 103*86ee64e7SAndroid Build Coastguard Worker+ 104*86ee64e7SAndroid Build Coastguard Worker+local void fold_1(deflate_state *const s, 105*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc0, __m128i *xmm_crc1, 106*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc2, __m128i *xmm_crc3) 107*86ee64e7SAndroid Build Coastguard Worker+{ 108*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_fold4 = _mm_set_epi32( 109*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0x54442bd4, 110*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0xc6e41596); 111*86ee64e7SAndroid Build Coastguard Worker+ 112*86ee64e7SAndroid Build Coastguard Worker+ __m128i x_tmp3; 113*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_crc0, ps_crc3, ps_res; 114*86ee64e7SAndroid Build Coastguard Worker+ 115*86ee64e7SAndroid Build Coastguard Worker+ x_tmp3 = *xmm_crc3; 116*86ee64e7SAndroid Build Coastguard Worker+ 117*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = *xmm_crc0; 118*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); 119*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); 120*86ee64e7SAndroid Build Coastguard Worker+ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); 121*86ee64e7SAndroid Build Coastguard Worker+ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); 122*86ee64e7SAndroid Build Coastguard Worker+ ps_res = _mm_xor_ps(ps_crc0, ps_crc3); 123*86ee64e7SAndroid Build Coastguard Worker+ 124*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = *xmm_crc1; 125*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = *xmm_crc2; 126*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = x_tmp3; 127*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_castps_si128(ps_res); 128*86ee64e7SAndroid Build Coastguard Worker+} 129*86ee64e7SAndroid Build Coastguard Worker+ 130*86ee64e7SAndroid Build Coastguard Worker+local void fold_2(deflate_state *const s, 131*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc0, __m128i *xmm_crc1, 132*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc2, __m128i *xmm_crc3) 133*86ee64e7SAndroid Build Coastguard Worker+{ 134*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_fold4 = _mm_set_epi32( 135*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0x54442bd4, 136*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0xc6e41596); 137*86ee64e7SAndroid Build Coastguard Worker+ 138*86ee64e7SAndroid Build Coastguard Worker+ __m128i x_tmp3, x_tmp2; 139*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res31, ps_res20; 140*86ee64e7SAndroid Build Coastguard Worker+ 141*86ee64e7SAndroid Build Coastguard Worker+ x_tmp3 = *xmm_crc3; 142*86ee64e7SAndroid Build Coastguard Worker+ x_tmp2 = *xmm_crc2; 143*86ee64e7SAndroid Build Coastguard Worker+ 144*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = *xmm_crc1; 145*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); 146*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); 147*86ee64e7SAndroid Build Coastguard Worker+ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); 148*86ee64e7SAndroid Build Coastguard Worker+ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); 149*86ee64e7SAndroid Build Coastguard Worker+ ps_res31= _mm_xor_ps(ps_crc3, ps_crc1); 150*86ee64e7SAndroid Build Coastguard Worker+ 151*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = *xmm_crc0; 152*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); 153*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10); 154*86ee64e7SAndroid Build Coastguard Worker+ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); 155*86ee64e7SAndroid Build Coastguard Worker+ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); 156*86ee64e7SAndroid Build Coastguard Worker+ ps_res20= _mm_xor_ps(ps_crc0, ps_crc2); 157*86ee64e7SAndroid Build Coastguard Worker+ 158*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = x_tmp2; 159*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = x_tmp3; 160*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_castps_si128(ps_res20); 161*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_castps_si128(ps_res31); 162*86ee64e7SAndroid Build Coastguard Worker+} 163*86ee64e7SAndroid Build Coastguard Worker+ 164*86ee64e7SAndroid Build Coastguard Worker+local void fold_3(deflate_state *const s, 165*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc0, __m128i *xmm_crc1, 166*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc2, __m128i *xmm_crc3) 167*86ee64e7SAndroid Build Coastguard Worker+{ 168*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_fold4 = _mm_set_epi32( 169*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0x54442bd4, 170*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0xc6e41596); 171*86ee64e7SAndroid Build Coastguard Worker+ 172*86ee64e7SAndroid Build Coastguard Worker+ __m128i x_tmp3; 173*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res32, ps_res21, ps_res10; 174*86ee64e7SAndroid Build Coastguard Worker+ 175*86ee64e7SAndroid Build Coastguard Worker+ x_tmp3 = *xmm_crc3; 176*86ee64e7SAndroid Build Coastguard Worker+ 177*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = *xmm_crc2; 178*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01); 179*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10); 180*86ee64e7SAndroid Build Coastguard Worker+ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); 181*86ee64e7SAndroid Build Coastguard Worker+ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); 182*86ee64e7SAndroid Build Coastguard Worker+ ps_res32 = _mm_xor_ps(ps_crc2, ps_crc3); 183*86ee64e7SAndroid Build Coastguard Worker+ 184*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = *xmm_crc1; 185*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); 186*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10); 187*86ee64e7SAndroid Build Coastguard Worker+ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); 188*86ee64e7SAndroid Build Coastguard Worker+ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); 189*86ee64e7SAndroid Build Coastguard Worker+ ps_res21= _mm_xor_ps(ps_crc1, ps_crc2); 190*86ee64e7SAndroid Build Coastguard Worker+ 191*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = *xmm_crc0; 192*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); 193*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x10); 194*86ee64e7SAndroid Build Coastguard Worker+ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); 195*86ee64e7SAndroid Build Coastguard Worker+ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); 196*86ee64e7SAndroid Build Coastguard Worker+ ps_res10= _mm_xor_ps(ps_crc0, ps_crc1); 197*86ee64e7SAndroid Build Coastguard Worker+ 198*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = x_tmp3; 199*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_castps_si128(ps_res10); 200*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_castps_si128(ps_res21); 201*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_castps_si128(ps_res32); 202*86ee64e7SAndroid Build Coastguard Worker+} 203*86ee64e7SAndroid Build Coastguard Worker+ 204*86ee64e7SAndroid Build Coastguard Worker+local void fold_4(deflate_state *const s, 205*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc0, __m128i *xmm_crc1, 206*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc2, __m128i *xmm_crc3) 207*86ee64e7SAndroid Build Coastguard Worker+{ 208*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_fold4 = _mm_set_epi32( 209*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0x54442bd4, 210*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0xc6e41596); 211*86ee64e7SAndroid Build Coastguard Worker+ 212*86ee64e7SAndroid Build Coastguard Worker+ __m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3; 213*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3; 214*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_t0, ps_t1, ps_t2, ps_t3; 215*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_res0, ps_res1, ps_res2, ps_res3; 216*86ee64e7SAndroid Build Coastguard Worker+ 217*86ee64e7SAndroid Build Coastguard Worker+ x_tmp0 = *xmm_crc0; 218*86ee64e7SAndroid Build Coastguard Worker+ x_tmp1 = *xmm_crc1; 219*86ee64e7SAndroid Build Coastguard Worker+ x_tmp2 = *xmm_crc2; 220*86ee64e7SAndroid Build Coastguard Worker+ x_tmp3 = *xmm_crc3; 221*86ee64e7SAndroid Build Coastguard Worker+ 222*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01); 223*86ee64e7SAndroid Build Coastguard Worker+ x_tmp0 = _mm_clmulepi64_si128(x_tmp0, xmm_fold4, 0x10); 224*86ee64e7SAndroid Build Coastguard Worker+ ps_crc0 = _mm_castsi128_ps(*xmm_crc0); 225*86ee64e7SAndroid Build Coastguard Worker+ ps_t0 = _mm_castsi128_ps(x_tmp0); 226*86ee64e7SAndroid Build Coastguard Worker+ ps_res0 = _mm_xor_ps(ps_crc0, ps_t0); 227*86ee64e7SAndroid Build Coastguard Worker+ 228*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01); 229*86ee64e7SAndroid Build Coastguard Worker+ x_tmp1 = _mm_clmulepi64_si128(x_tmp1, xmm_fold4, 0x10); 230*86ee64e7SAndroid Build Coastguard Worker+ ps_crc1 = _mm_castsi128_ps(*xmm_crc1); 231*86ee64e7SAndroid Build Coastguard Worker+ ps_t1 = _mm_castsi128_ps(x_tmp1); 232*86ee64e7SAndroid Build Coastguard Worker+ ps_res1 = _mm_xor_ps(ps_crc1, ps_t1); 233*86ee64e7SAndroid Build Coastguard Worker+ 234*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01); 235*86ee64e7SAndroid Build Coastguard Worker+ x_tmp2 = _mm_clmulepi64_si128(x_tmp2, xmm_fold4, 0x10); 236*86ee64e7SAndroid Build Coastguard Worker+ ps_crc2 = _mm_castsi128_ps(*xmm_crc2); 237*86ee64e7SAndroid Build Coastguard Worker+ ps_t2 = _mm_castsi128_ps(x_tmp2); 238*86ee64e7SAndroid Build Coastguard Worker+ ps_res2 = _mm_xor_ps(ps_crc2, ps_t2); 239*86ee64e7SAndroid Build Coastguard Worker+ 240*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x01); 241*86ee64e7SAndroid Build Coastguard Worker+ x_tmp3 = _mm_clmulepi64_si128(x_tmp3, xmm_fold4, 0x10); 242*86ee64e7SAndroid Build Coastguard Worker+ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); 243*86ee64e7SAndroid Build Coastguard Worker+ ps_t3 = _mm_castsi128_ps(x_tmp3); 244*86ee64e7SAndroid Build Coastguard Worker+ ps_res3 = _mm_xor_ps(ps_crc3, ps_t3); 245*86ee64e7SAndroid Build Coastguard Worker+ 246*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_castps_si128(ps_res0); 247*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_castps_si128(ps_res1); 248*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_castps_si128(ps_res2); 249*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_castps_si128(ps_res3); 250*86ee64e7SAndroid Build Coastguard Worker+} 251*86ee64e7SAndroid Build Coastguard Worker+ 252*86ee64e7SAndroid Build Coastguard Worker+local const unsigned zalign(32) pshufb_shf_table[60] = { 253*86ee64e7SAndroid Build Coastguard Worker+ 0x84838281,0x88878685,0x8c8b8a89,0x008f8e8d, /* shl 15 (16 - 1)/shr1 */ 254*86ee64e7SAndroid Build Coastguard Worker+ 0x85848382,0x89888786,0x8d8c8b8a,0x01008f8e, /* shl 14 (16 - 3)/shr2 */ 255*86ee64e7SAndroid Build Coastguard Worker+ 0x86858483,0x8a898887,0x8e8d8c8b,0x0201008f, /* shl 13 (16 - 4)/shr3 */ 256*86ee64e7SAndroid Build Coastguard Worker+ 0x87868584,0x8b8a8988,0x8f8e8d8c,0x03020100, /* shl 12 (16 - 4)/shr4 */ 257*86ee64e7SAndroid Build Coastguard Worker+ 0x88878685,0x8c8b8a89,0x008f8e8d,0x04030201, /* shl 11 (16 - 5)/shr5 */ 258*86ee64e7SAndroid Build Coastguard Worker+ 0x89888786,0x8d8c8b8a,0x01008f8e,0x05040302, /* shl 10 (16 - 6)/shr6 */ 259*86ee64e7SAndroid Build Coastguard Worker+ 0x8a898887,0x8e8d8c8b,0x0201008f,0x06050403, /* shl 9 (16 - 7)/shr7 */ 260*86ee64e7SAndroid Build Coastguard Worker+ 0x8b8a8988,0x8f8e8d8c,0x03020100,0x07060504, /* shl 8 (16 - 8)/shr8 */ 261*86ee64e7SAndroid Build Coastguard Worker+ 0x8c8b8a89,0x008f8e8d,0x04030201,0x08070605, /* shl 7 (16 - 9)/shr9 */ 262*86ee64e7SAndroid Build Coastguard Worker+ 0x8d8c8b8a,0x01008f8e,0x05040302,0x09080706, /* shl 6 (16 -10)/shr10*/ 263*86ee64e7SAndroid Build Coastguard Worker+ 0x8e8d8c8b,0x0201008f,0x06050403,0x0a090807, /* shl 5 (16 -11)/shr11*/ 264*86ee64e7SAndroid Build Coastguard Worker+ 0x8f8e8d8c,0x03020100,0x07060504,0x0b0a0908, /* shl 4 (16 -12)/shr12*/ 265*86ee64e7SAndroid Build Coastguard Worker+ 0x008f8e8d,0x04030201,0x08070605,0x0c0b0a09, /* shl 3 (16 -13)/shr13*/ 266*86ee64e7SAndroid Build Coastguard Worker+ 0x01008f8e,0x05040302,0x09080706,0x0d0c0b0a, /* shl 2 (16 -14)/shr14*/ 267*86ee64e7SAndroid Build Coastguard Worker+ 0x0201008f,0x06050403,0x0a090807,0x0e0d0c0b /* shl 1 (16 -15)/shr15*/ 268*86ee64e7SAndroid Build Coastguard Worker+}; 269*86ee64e7SAndroid Build Coastguard Worker+ 270*86ee64e7SAndroid Build Coastguard Worker+local void partial_fold(deflate_state *const s, const size_t len, 271*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc0, __m128i *xmm_crc1, 272*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc2, __m128i *xmm_crc3, 273*86ee64e7SAndroid Build Coastguard Worker+ __m128i *xmm_crc_part) 274*86ee64e7SAndroid Build Coastguard Worker+{ 275*86ee64e7SAndroid Build Coastguard Worker+ 276*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_fold4 = _mm_set_epi32( 277*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0x54442bd4, 278*86ee64e7SAndroid Build Coastguard Worker+ 0x00000001, 0xc6e41596); 279*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080); 280*86ee64e7SAndroid Build Coastguard Worker+ 281*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3; 282*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_a0_0, xmm_a0_1; 283*86ee64e7SAndroid Build Coastguard Worker+ __m128 ps_crc3, psa0_0, psa0_1, ps_res; 284*86ee64e7SAndroid Build Coastguard Worker+ 285*86ee64e7SAndroid Build Coastguard Worker+ xmm_shl = _mm_load_si128((__m128i *)pshufb_shf_table + (len - 1)); 286*86ee64e7SAndroid Build Coastguard Worker+ xmm_shr = xmm_shl; 287*86ee64e7SAndroid Build Coastguard Worker+ xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3); 288*86ee64e7SAndroid Build Coastguard Worker+ 289*86ee64e7SAndroid Build Coastguard Worker+ xmm_a0_0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shl); 290*86ee64e7SAndroid Build Coastguard Worker+ 291*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shr); 292*86ee64e7SAndroid Build Coastguard Worker+ xmm_tmp1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shl); 293*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc0 = _mm_or_si128(*xmm_crc0, xmm_tmp1); 294*86ee64e7SAndroid Build Coastguard Worker+ 295*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shr); 296*86ee64e7SAndroid Build Coastguard Worker+ xmm_tmp2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shl); 297*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc1 = _mm_or_si128(*xmm_crc1, xmm_tmp2); 298*86ee64e7SAndroid Build Coastguard Worker+ 299*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shr); 300*86ee64e7SAndroid Build Coastguard Worker+ xmm_tmp3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shl); 301*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc2 = _mm_or_si128(*xmm_crc2, xmm_tmp3); 302*86ee64e7SAndroid Build Coastguard Worker+ 303*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shr); 304*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc_part = _mm_shuffle_epi8(*xmm_crc_part, xmm_shl); 305*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_or_si128(*xmm_crc3, *xmm_crc_part); 306*86ee64e7SAndroid Build Coastguard Worker+ 307*86ee64e7SAndroid Build Coastguard Worker+ xmm_a0_1 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x10); 308*86ee64e7SAndroid Build Coastguard Worker+ xmm_a0_0 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x01); 309*86ee64e7SAndroid Build Coastguard Worker+ 310*86ee64e7SAndroid Build Coastguard Worker+ ps_crc3 = _mm_castsi128_ps(*xmm_crc3); 311*86ee64e7SAndroid Build Coastguard Worker+ psa0_0 = _mm_castsi128_ps(xmm_a0_0); 312*86ee64e7SAndroid Build Coastguard Worker+ psa0_1 = _mm_castsi128_ps(xmm_a0_1); 313*86ee64e7SAndroid Build Coastguard Worker+ 314*86ee64e7SAndroid Build Coastguard Worker+ ps_res = _mm_xor_ps(ps_crc3, psa0_0); 315*86ee64e7SAndroid Build Coastguard Worker+ ps_res = _mm_xor_ps(ps_res, psa0_1); 316*86ee64e7SAndroid Build Coastguard Worker+ 317*86ee64e7SAndroid Build Coastguard Worker+ *xmm_crc3 = _mm_castps_si128(ps_res); 318*86ee64e7SAndroid Build Coastguard Worker+} 319*86ee64e7SAndroid Build Coastguard Worker+ 320*86ee64e7SAndroid Build Coastguard Worker+ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, 321*86ee64e7SAndroid Build Coastguard Worker+ unsigned char *dst, const unsigned char *src, long len) 322*86ee64e7SAndroid Build Coastguard Worker+{ 323*86ee64e7SAndroid Build Coastguard Worker+ unsigned long algn_diff; 324*86ee64e7SAndroid Build Coastguard Worker+ __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; 325*86ee64e7SAndroid Build Coastguard Worker+ 326*86ee64e7SAndroid Build Coastguard Worker+ CRC_LOAD(s) 327*86ee64e7SAndroid Build Coastguard Worker+ 328*86ee64e7SAndroid Build Coastguard Worker+ if (len < 16) { 329*86ee64e7SAndroid Build Coastguard Worker+ if (len == 0) 330*86ee64e7SAndroid Build Coastguard Worker+ return; 331*86ee64e7SAndroid Build Coastguard Worker+ goto partial; 332*86ee64e7SAndroid Build Coastguard Worker+ } 333*86ee64e7SAndroid Build Coastguard Worker+ 334*86ee64e7SAndroid Build Coastguard Worker+ algn_diff = 0 - (uintptr_t)src & 0xF; 335*86ee64e7SAndroid Build Coastguard Worker+ if (algn_diff) { 336*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc_part = _mm_loadu_si128((__m128i *)src); 337*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst, xmm_crc_part); 338*86ee64e7SAndroid Build Coastguard Worker+ 339*86ee64e7SAndroid Build Coastguard Worker+ dst += algn_diff; 340*86ee64e7SAndroid Build Coastguard Worker+ src += algn_diff; 341*86ee64e7SAndroid Build Coastguard Worker+ len -= algn_diff; 342*86ee64e7SAndroid Build Coastguard Worker+ 343*86ee64e7SAndroid Build Coastguard Worker+ partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, 344*86ee64e7SAndroid Build Coastguard Worker+ &xmm_crc_part); 345*86ee64e7SAndroid Build Coastguard Worker+ } 346*86ee64e7SAndroid Build Coastguard Worker+ 347*86ee64e7SAndroid Build Coastguard Worker+ while ((len -= 64) >= 0) { 348*86ee64e7SAndroid Build Coastguard Worker+ xmm_t0 = _mm_load_si128((__m128i *)src); 349*86ee64e7SAndroid Build Coastguard Worker+ xmm_t1 = _mm_load_si128((__m128i *)src + 1); 350*86ee64e7SAndroid Build Coastguard Worker+ xmm_t2 = _mm_load_si128((__m128i *)src + 2); 351*86ee64e7SAndroid Build Coastguard Worker+ xmm_t3 = _mm_load_si128((__m128i *)src + 3); 352*86ee64e7SAndroid Build Coastguard Worker+ 353*86ee64e7SAndroid Build Coastguard Worker+ fold_4(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); 354*86ee64e7SAndroid Build Coastguard Worker+ 355*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst, xmm_t0); 356*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst + 1, xmm_t1); 357*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst + 2, xmm_t2); 358*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst + 3, xmm_t3); 359*86ee64e7SAndroid Build Coastguard Worker+ 360*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc0 = _mm_xor_si128(xmm_crc0, xmm_t0); 361*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t1); 362*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2); 363*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3); 364*86ee64e7SAndroid Build Coastguard Worker+ 365*86ee64e7SAndroid Build Coastguard Worker+ src += 64; 366*86ee64e7SAndroid Build Coastguard Worker+ dst += 64; 367*86ee64e7SAndroid Build Coastguard Worker+ } 368*86ee64e7SAndroid Build Coastguard Worker+ 369*86ee64e7SAndroid Build Coastguard Worker+ /* 370*86ee64e7SAndroid Build Coastguard Worker+ * len = num bytes left - 64 371*86ee64e7SAndroid Build Coastguard Worker+ */ 372*86ee64e7SAndroid Build Coastguard Worker+ if (len + 16 >= 0) { 373*86ee64e7SAndroid Build Coastguard Worker+ len += 16; 374*86ee64e7SAndroid Build Coastguard Worker+ 375*86ee64e7SAndroid Build Coastguard Worker+ xmm_t0 = _mm_load_si128((__m128i *)src); 376*86ee64e7SAndroid Build Coastguard Worker+ xmm_t1 = _mm_load_si128((__m128i *)src + 1); 377*86ee64e7SAndroid Build Coastguard Worker+ xmm_t2 = _mm_load_si128((__m128i *)src + 2); 378*86ee64e7SAndroid Build Coastguard Worker+ 379*86ee64e7SAndroid Build Coastguard Worker+ fold_3(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); 380*86ee64e7SAndroid Build Coastguard Worker+ 381*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst, xmm_t0); 382*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst + 1, xmm_t1); 383*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst + 2, xmm_t2); 384*86ee64e7SAndroid Build Coastguard Worker+ 385*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t0); 386*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t1); 387*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t2); 388*86ee64e7SAndroid Build Coastguard Worker+ 389*86ee64e7SAndroid Build Coastguard Worker+ if (len == 0) 390*86ee64e7SAndroid Build Coastguard Worker+ goto done; 391*86ee64e7SAndroid Build Coastguard Worker+ 392*86ee64e7SAndroid Build Coastguard Worker+ dst += 48; 393*86ee64e7SAndroid Build Coastguard Worker+ src += 48; 394*86ee64e7SAndroid Build Coastguard Worker+ } else if (len + 32 >= 0) { 395*86ee64e7SAndroid Build Coastguard Worker+ len += 32; 396*86ee64e7SAndroid Build Coastguard Worker+ 397*86ee64e7SAndroid Build Coastguard Worker+ xmm_t0 = _mm_load_si128((__m128i *)src); 398*86ee64e7SAndroid Build Coastguard Worker+ xmm_t1 = _mm_load_si128((__m128i *)src + 1); 399*86ee64e7SAndroid Build Coastguard Worker+ 400*86ee64e7SAndroid Build Coastguard Worker+ fold_2(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); 401*86ee64e7SAndroid Build Coastguard Worker+ 402*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst, xmm_t0); 403*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst + 1, xmm_t1); 404*86ee64e7SAndroid Build Coastguard Worker+ 405*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t0); 406*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t1); 407*86ee64e7SAndroid Build Coastguard Worker+ 408*86ee64e7SAndroid Build Coastguard Worker+ if (len == 0) 409*86ee64e7SAndroid Build Coastguard Worker+ goto done; 410*86ee64e7SAndroid Build Coastguard Worker+ 411*86ee64e7SAndroid Build Coastguard Worker+ dst += 32; 412*86ee64e7SAndroid Build Coastguard Worker+ src += 32; 413*86ee64e7SAndroid Build Coastguard Worker+ } else if (len + 48 >= 0) { 414*86ee64e7SAndroid Build Coastguard Worker+ len += 48; 415*86ee64e7SAndroid Build Coastguard Worker+ 416*86ee64e7SAndroid Build Coastguard Worker+ xmm_t0 = _mm_load_si128((__m128i *)src); 417*86ee64e7SAndroid Build Coastguard Worker+ 418*86ee64e7SAndroid Build Coastguard Worker+ fold_1(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); 419*86ee64e7SAndroid Build Coastguard Worker+ 420*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)dst, xmm_t0); 421*86ee64e7SAndroid Build Coastguard Worker+ 422*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); 423*86ee64e7SAndroid Build Coastguard Worker+ 424*86ee64e7SAndroid Build Coastguard Worker+ if (len == 0) 425*86ee64e7SAndroid Build Coastguard Worker+ goto done; 426*86ee64e7SAndroid Build Coastguard Worker+ 427*86ee64e7SAndroid Build Coastguard Worker+ dst += 16; 428*86ee64e7SAndroid Build Coastguard Worker+ src += 16; 429*86ee64e7SAndroid Build Coastguard Worker+ } else { 430*86ee64e7SAndroid Build Coastguard Worker+ len += 64; 431*86ee64e7SAndroid Build Coastguard Worker+ if (len == 0) 432*86ee64e7SAndroid Build Coastguard Worker+ goto done; 433*86ee64e7SAndroid Build Coastguard Worker+ } 434*86ee64e7SAndroid Build Coastguard Worker+ 435*86ee64e7SAndroid Build Coastguard Worker+partial: 436*86ee64e7SAndroid Build Coastguard Worker+ 437*86ee64e7SAndroid Build Coastguard Worker+#if defined(_MSC_VER) 438*86ee64e7SAndroid Build Coastguard Worker+ /* VS does not permit the use of _mm_set_epi64x in 32-bit builds */ 439*86ee64e7SAndroid Build Coastguard Worker+ { 440*86ee64e7SAndroid Build Coastguard Worker+ int32_t parts[4] = {0, 0, 0, 0}; 441*86ee64e7SAndroid Build Coastguard Worker+ memcpy(&parts, src, len); 442*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc_part = _mm_set_epi32(parts[3], parts[2], parts[1], parts[0]); 443*86ee64e7SAndroid Build Coastguard Worker+ } 444*86ee64e7SAndroid Build Coastguard Worker+#else 445*86ee64e7SAndroid Build Coastguard Worker+ { 446*86ee64e7SAndroid Build Coastguard Worker+ int64_t parts[2] = {0, 0}; 447*86ee64e7SAndroid Build Coastguard Worker+ memcpy(&parts, src, len); 448*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc_part = _mm_set_epi64x(parts[1], parts[0]); 449*86ee64e7SAndroid Build Coastguard Worker+ } 450*86ee64e7SAndroid Build Coastguard Worker+#endif 451*86ee64e7SAndroid Build Coastguard Worker+ 452*86ee64e7SAndroid Build Coastguard Worker+ zmemcpy(dst, src, len); /* TODO: Possibly generate more efficient code. */ 453*86ee64e7SAndroid Build Coastguard Worker+ partial_fold(s, len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, 454*86ee64e7SAndroid Build Coastguard Worker+ &xmm_crc_part); 455*86ee64e7SAndroid Build Coastguard Worker+done: 456*86ee64e7SAndroid Build Coastguard Worker+ CRC_SAVE(s) 457*86ee64e7SAndroid Build Coastguard Worker+} 458*86ee64e7SAndroid Build Coastguard Worker+ 459*86ee64e7SAndroid Build Coastguard Worker+local const unsigned zalign(16) crc_k[] = { 460*86ee64e7SAndroid Build Coastguard Worker+ 0xccaa009e, 0x00000000, /* rk1 */ 461*86ee64e7SAndroid Build Coastguard Worker+ 0x751997d0, 0x00000001, /* rk2 */ 462*86ee64e7SAndroid Build Coastguard Worker+ 0xccaa009e, 0x00000000, /* rk5 */ 463*86ee64e7SAndroid Build Coastguard Worker+ 0x63cd6124, 0x00000001, /* rk6 */ 464*86ee64e7SAndroid Build Coastguard Worker+ 0xf7011640, 0x00000001, /* rk7 */ 465*86ee64e7SAndroid Build Coastguard Worker+ 0xdb710640, 0x00000001 /* rk8 */ 466*86ee64e7SAndroid Build Coastguard Worker+}; 467*86ee64e7SAndroid Build Coastguard Worker+ 468*86ee64e7SAndroid Build Coastguard Worker+local const unsigned zalign(16) crc_mask[4] = { 469*86ee64e7SAndroid Build Coastguard Worker+ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 470*86ee64e7SAndroid Build Coastguard Worker+}; 471*86ee64e7SAndroid Build Coastguard Worker+ 472*86ee64e7SAndroid Build Coastguard Worker+local const unsigned zalign(16) crc_mask2[4] = { 473*86ee64e7SAndroid Build Coastguard Worker+ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF 474*86ee64e7SAndroid Build Coastguard Worker+}; 475*86ee64e7SAndroid Build Coastguard Worker+ 476*86ee64e7SAndroid Build Coastguard Worker+unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) 477*86ee64e7SAndroid Build Coastguard Worker+{ 478*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask); 479*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2); 480*86ee64e7SAndroid Build Coastguard Worker+ 481*86ee64e7SAndroid Build Coastguard Worker+ unsigned crc; 482*86ee64e7SAndroid Build Coastguard Worker+ __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; 483*86ee64e7SAndroid Build Coastguard Worker+ 484*86ee64e7SAndroid Build Coastguard Worker+ CRC_LOAD(s) 485*86ee64e7SAndroid Build Coastguard Worker+ 486*86ee64e7SAndroid Build Coastguard Worker+ /* 487*86ee64e7SAndroid Build Coastguard Worker+ * k1 488*86ee64e7SAndroid Build Coastguard Worker+ */ 489*86ee64e7SAndroid Build Coastguard Worker+ crc_fold = _mm_load_si128((__m128i *)crc_k); 490*86ee64e7SAndroid Build Coastguard Worker+ 491*86ee64e7SAndroid Build Coastguard Worker+ x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10); 492*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01); 493*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0); 494*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0); 495*86ee64e7SAndroid Build Coastguard Worker+ 496*86ee64e7SAndroid Build Coastguard Worker+ x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10); 497*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01); 498*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1); 499*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1); 500*86ee64e7SAndroid Build Coastguard Worker+ 501*86ee64e7SAndroid Build Coastguard Worker+ x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10); 502*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01); 503*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2); 504*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); 505*86ee64e7SAndroid Build Coastguard Worker+ 506*86ee64e7SAndroid Build Coastguard Worker+ /* 507*86ee64e7SAndroid Build Coastguard Worker+ * k5 508*86ee64e7SAndroid Build Coastguard Worker+ */ 509*86ee64e7SAndroid Build Coastguard Worker+ crc_fold = _mm_load_si128((__m128i *)crc_k + 1); 510*86ee64e7SAndroid Build Coastguard Worker+ 511*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc0 = xmm_crc3; 512*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); 513*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc0 = _mm_srli_si128(xmm_crc0, 8); 514*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); 515*86ee64e7SAndroid Build Coastguard Worker+ 516*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc0 = xmm_crc3; 517*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_slli_si128(xmm_crc3, 4); 518*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); 519*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); 520*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask2); 521*86ee64e7SAndroid Build Coastguard Worker+ 522*86ee64e7SAndroid Build Coastguard Worker+ /* 523*86ee64e7SAndroid Build Coastguard Worker+ * k7 524*86ee64e7SAndroid Build Coastguard Worker+ */ 525*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc1 = xmm_crc3; 526*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = xmm_crc3; 527*86ee64e7SAndroid Build Coastguard Worker+ crc_fold = _mm_load_si128((__m128i *)crc_k + 2); 528*86ee64e7SAndroid Build Coastguard Worker+ 529*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); 530*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); 531*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask); 532*86ee64e7SAndroid Build Coastguard Worker+ 533*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc2 = xmm_crc3; 534*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); 535*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); 536*86ee64e7SAndroid Build Coastguard Worker+ xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); 537*86ee64e7SAndroid Build Coastguard Worker+ 538*86ee64e7SAndroid Build Coastguard Worker+ crc = _mm_extract_epi32(xmm_crc3, 2); 539*86ee64e7SAndroid Build Coastguard Worker+ return ~crc; 540*86ee64e7SAndroid Build Coastguard Worker+ CRC_SAVE(s) 541*86ee64e7SAndroid Build Coastguard Worker+} 542*86ee64e7SAndroid Build Coastguard Workerdiff --git a/deflate.c b/deflate.c 543*86ee64e7SAndroid Build Coastguard Workerindex 1ec761448de9..aa0c9c67a6dc 100644 544*86ee64e7SAndroid Build Coastguard Worker--- a/deflate.c 545*86ee64e7SAndroid Build Coastguard Worker+++ b/deflate.c 546*86ee64e7SAndroid Build Coastguard Worker@@ -48,8 +48,9 @@ 547*86ee64e7SAndroid Build Coastguard Worker */ 548*86ee64e7SAndroid Build Coastguard Worker 549*86ee64e7SAndroid Build Coastguard Worker /* @(#) $Id$ */ 550*86ee64e7SAndroid Build Coastguard Worker- 551*86ee64e7SAndroid Build Coastguard Worker+#include <assert.h> 552*86ee64e7SAndroid Build Coastguard Worker #include "deflate.h" 553*86ee64e7SAndroid Build Coastguard Worker+#include "x86.h" 554*86ee64e7SAndroid Build Coastguard Worker 555*86ee64e7SAndroid Build Coastguard Worker const char deflate_copyright[] = 556*86ee64e7SAndroid Build Coastguard Worker " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; 557*86ee64e7SAndroid Build Coastguard Worker@@ -86,7 +87,7 @@ local block_state deflate_huff OF((deflate_state *s, int flush)); 558*86ee64e7SAndroid Build Coastguard Worker local void lm_init OF((deflate_state *s)); 559*86ee64e7SAndroid Build Coastguard Worker local void putShortMSB OF((deflate_state *s, uInt b)); 560*86ee64e7SAndroid Build Coastguard Worker local void flush_pending OF((z_streamp strm)); 561*86ee64e7SAndroid Build Coastguard Worker-local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); 562*86ee64e7SAndroid Build Coastguard Worker+unsigned ZLIB_INTERNAL deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); 563*86ee64e7SAndroid Build Coastguard Worker #ifdef ASMV 564*86ee64e7SAndroid Build Coastguard Worker # pragma message("Assembler code may have bugs -- use at your own risk") 565*86ee64e7SAndroid Build Coastguard Worker void match_init OF((void)); /* asm code initialization */ 566*86ee64e7SAndroid Build Coastguard Worker@@ -100,6 +101,20 @@ local void check_match OF((deflate_state *s, IPos start, IPos match, 567*86ee64e7SAndroid Build Coastguard Worker int length)); 568*86ee64e7SAndroid Build Coastguard Worker #endif 569*86ee64e7SAndroid Build Coastguard Worker 570*86ee64e7SAndroid Build Coastguard Worker+/* From crc32.c */ 571*86ee64e7SAndroid Build Coastguard Worker+extern void ZLIB_INTERNAL crc_reset(deflate_state *const s); 572*86ee64e7SAndroid Build Coastguard Worker+extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s); 573*86ee64e7SAndroid Build Coastguard Worker+extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size); 574*86ee64e7SAndroid Build Coastguard Worker+ 575*86ee64e7SAndroid Build Coastguard Worker+#ifdef _MSC_VER 576*86ee64e7SAndroid Build Coastguard Worker+#define INLINE __inline 577*86ee64e7SAndroid Build Coastguard Worker+#else 578*86ee64e7SAndroid Build Coastguard Worker+#define INLINE inline 579*86ee64e7SAndroid Build Coastguard Worker+#endif 580*86ee64e7SAndroid Build Coastguard Worker+ 581*86ee64e7SAndroid Build Coastguard Worker+/* Inline optimisation */ 582*86ee64e7SAndroid Build Coastguard Worker+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str); 583*86ee64e7SAndroid Build Coastguard Worker+ 584*86ee64e7SAndroid Build Coastguard Worker /* =========================================================================== 585*86ee64e7SAndroid Build Coastguard Worker * Local data 586*86ee64e7SAndroid Build Coastguard Worker */ 587*86ee64e7SAndroid Build Coastguard Worker@@ -162,7 +177,6 @@ local const config configuration_table[10] = { 588*86ee64e7SAndroid Build Coastguard Worker */ 589*86ee64e7SAndroid Build Coastguard Worker #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) 590*86ee64e7SAndroid Build Coastguard Worker 591*86ee64e7SAndroid Build Coastguard Worker- 592*86ee64e7SAndroid Build Coastguard Worker /* =========================================================================== 593*86ee64e7SAndroid Build Coastguard Worker * Insert string str in the dictionary and set match_head to the previous head 594*86ee64e7SAndroid Build Coastguard Worker * of the hash chain (the most recent string with same hash key). Return 595*86ee64e7SAndroid Build Coastguard Worker@@ -173,17 +187,28 @@ local const config configuration_table[10] = { 596*86ee64e7SAndroid Build Coastguard Worker * characters and the first MIN_MATCH bytes of str are valid (except for 597*86ee64e7SAndroid Build Coastguard Worker * the last MIN_MATCH-1 bytes of the input file). 598*86ee64e7SAndroid Build Coastguard Worker */ 599*86ee64e7SAndroid Build Coastguard Worker+local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) 600*86ee64e7SAndroid Build Coastguard Worker+{ 601*86ee64e7SAndroid Build Coastguard Worker+ Pos ret; 602*86ee64e7SAndroid Build Coastguard Worker+ 603*86ee64e7SAndroid Build Coastguard Worker+ UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); 604*86ee64e7SAndroid Build Coastguard Worker #ifdef FASTEST 605*86ee64e7SAndroid Build Coastguard Worker-#define INSERT_STRING(s, str, match_head) \ 606*86ee64e7SAndroid Build Coastguard Worker- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ 607*86ee64e7SAndroid Build Coastguard Worker- match_head = s->head[s->ins_h], \ 608*86ee64e7SAndroid Build Coastguard Worker- s->head[s->ins_h] = (Pos)(str)) 609*86ee64e7SAndroid Build Coastguard Worker+ ret = s->head[s->ins_h]; 610*86ee64e7SAndroid Build Coastguard Worker #else 611*86ee64e7SAndroid Build Coastguard Worker-#define INSERT_STRING(s, str, match_head) \ 612*86ee64e7SAndroid Build Coastguard Worker- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ 613*86ee64e7SAndroid Build Coastguard Worker- match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ 614*86ee64e7SAndroid Build Coastguard Worker- s->head[s->ins_h] = (Pos)(str)) 615*86ee64e7SAndroid Build Coastguard Worker+ ret = s->prev[str & s->w_mask] = s->head[s->ins_h]; 616*86ee64e7SAndroid Build Coastguard Worker #endif 617*86ee64e7SAndroid Build Coastguard Worker+ s->head[s->ins_h] = str; 618*86ee64e7SAndroid Build Coastguard Worker+ 619*86ee64e7SAndroid Build Coastguard Worker+ return ret; 620*86ee64e7SAndroid Build Coastguard Worker+} 621*86ee64e7SAndroid Build Coastguard Worker+ 622*86ee64e7SAndroid Build Coastguard Worker+local INLINE Pos insert_string(deflate_state *const s, const Pos str) 623*86ee64e7SAndroid Build Coastguard Worker+{ 624*86ee64e7SAndroid Build Coastguard Worker+ if (x86_cpu_enable_simd) 625*86ee64e7SAndroid Build Coastguard Worker+ return insert_string_sse(s, str); 626*86ee64e7SAndroid Build Coastguard Worker+ return insert_string_c(s, str); 627*86ee64e7SAndroid Build Coastguard Worker+} 628*86ee64e7SAndroid Build Coastguard Worker+ 629*86ee64e7SAndroid Build Coastguard Worker 630*86ee64e7SAndroid Build Coastguard Worker /* =========================================================================== 631*86ee64e7SAndroid Build Coastguard Worker * Initialize the hash table (avoiding 64K overflow for 16 bit systems). 632*86ee64e7SAndroid Build Coastguard Worker@@ -248,6 +273,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, 633*86ee64e7SAndroid Build Coastguard Worker const char *version; 634*86ee64e7SAndroid Build Coastguard Worker int stream_size; 635*86ee64e7SAndroid Build Coastguard Worker { 636*86ee64e7SAndroid Build Coastguard Worker+ unsigned window_padding = 8; 637*86ee64e7SAndroid Build Coastguard Worker deflate_state *s; 638*86ee64e7SAndroid Build Coastguard Worker int wrap = 1; 639*86ee64e7SAndroid Build Coastguard Worker static const char my_version[] = ZLIB_VERSION; 640*86ee64e7SAndroid Build Coastguard Worker@@ -257,6 +283,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, 641*86ee64e7SAndroid Build Coastguard Worker * output size for (length,distance) codes is <= 24 bits. 642*86ee64e7SAndroid Build Coastguard Worker */ 643*86ee64e7SAndroid Build Coastguard Worker 644*86ee64e7SAndroid Build Coastguard Worker+ x86_check_features(); 645*86ee64e7SAndroid Build Coastguard Worker+ 646*86ee64e7SAndroid Build Coastguard Worker if (version == Z_NULL || version[0] != my_version[0] || 647*86ee64e7SAndroid Build Coastguard Worker stream_size != sizeof(z_stream)) { 648*86ee64e7SAndroid Build Coastguard Worker return Z_VERSION_ERROR; 649*86ee64e7SAndroid Build Coastguard Worker@@ -313,12 +341,19 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, 650*86ee64e7SAndroid Build Coastguard Worker s->w_size = 1 << s->w_bits; 651*86ee64e7SAndroid Build Coastguard Worker s->w_mask = s->w_size - 1; 652*86ee64e7SAndroid Build Coastguard Worker 653*86ee64e7SAndroid Build Coastguard Worker- s->hash_bits = (uInt)memLevel + 7; 654*86ee64e7SAndroid Build Coastguard Worker+ if (x86_cpu_enable_simd) { 655*86ee64e7SAndroid Build Coastguard Worker+ s->hash_bits = 15; 656*86ee64e7SAndroid Build Coastguard Worker+ } else { 657*86ee64e7SAndroid Build Coastguard Worker+ s->hash_bits = memLevel + 7; 658*86ee64e7SAndroid Build Coastguard Worker+ } 659*86ee64e7SAndroid Build Coastguard Worker+ 660*86ee64e7SAndroid Build Coastguard Worker s->hash_size = 1 << s->hash_bits; 661*86ee64e7SAndroid Build Coastguard Worker s->hash_mask = s->hash_size - 1; 662*86ee64e7SAndroid Build Coastguard Worker s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); 663*86ee64e7SAndroid Build Coastguard Worker 664*86ee64e7SAndroid Build Coastguard Worker- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); 665*86ee64e7SAndroid Build Coastguard Worker+ s->window = (Bytef *) ZALLOC(strm, 666*86ee64e7SAndroid Build Coastguard Worker+ s->w_size + window_padding, 667*86ee64e7SAndroid Build Coastguard Worker+ 2*sizeof(Byte)); 668*86ee64e7SAndroid Build Coastguard Worker s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); 669*86ee64e7SAndroid Build Coastguard Worker s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); 670*86ee64e7SAndroid Build Coastguard Worker 671*86ee64e7SAndroid Build Coastguard Worker@@ -418,11 +453,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) 672*86ee64e7SAndroid Build Coastguard Worker str = s->strstart; 673*86ee64e7SAndroid Build Coastguard Worker n = s->lookahead - (MIN_MATCH-1); 674*86ee64e7SAndroid Build Coastguard Worker do { 675*86ee64e7SAndroid Build Coastguard Worker- UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); 676*86ee64e7SAndroid Build Coastguard Worker-#ifndef FASTEST 677*86ee64e7SAndroid Build Coastguard Worker- s->prev[str & s->w_mask] = s->head[s->ins_h]; 678*86ee64e7SAndroid Build Coastguard Worker-#endif 679*86ee64e7SAndroid Build Coastguard Worker- s->head[s->ins_h] = (Pos)str; 680*86ee64e7SAndroid Build Coastguard Worker+ insert_string(s, str); 681*86ee64e7SAndroid Build Coastguard Worker str++; 682*86ee64e7SAndroid Build Coastguard Worker } while (--n); 683*86ee64e7SAndroid Build Coastguard Worker s->strstart = str; 684*86ee64e7SAndroid Build Coastguard Worker@@ -848,7 +879,7 @@ int ZEXPORT deflate (strm, flush) 685*86ee64e7SAndroid Build Coastguard Worker #ifdef GZIP 686*86ee64e7SAndroid Build Coastguard Worker if (s->status == GZIP_STATE) { 687*86ee64e7SAndroid Build Coastguard Worker /* gzip header */ 688*86ee64e7SAndroid Build Coastguard Worker- strm->adler = crc32(0L, Z_NULL, 0); 689*86ee64e7SAndroid Build Coastguard Worker+ crc_reset(s); 690*86ee64e7SAndroid Build Coastguard Worker put_byte(s, 31); 691*86ee64e7SAndroid Build Coastguard Worker put_byte(s, 139); 692*86ee64e7SAndroid Build Coastguard Worker put_byte(s, 8); 693*86ee64e7SAndroid Build Coastguard Worker@@ -1049,6 +1080,7 @@ int ZEXPORT deflate (strm, flush) 694*86ee64e7SAndroid Build Coastguard Worker /* Write the trailer */ 695*86ee64e7SAndroid Build Coastguard Worker #ifdef GZIP 696*86ee64e7SAndroid Build Coastguard Worker if (s->wrap == 2) { 697*86ee64e7SAndroid Build Coastguard Worker+ crc_finalize(s); 698*86ee64e7SAndroid Build Coastguard Worker put_byte(s, (Byte)(strm->adler & 0xff)); 699*86ee64e7SAndroid Build Coastguard Worker put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); 700*86ee64e7SAndroid Build Coastguard Worker put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); 701*86ee64e7SAndroid Build Coastguard Worker@@ -1161,7 +1193,7 @@ int ZEXPORT deflateCopy (dest, source) 702*86ee64e7SAndroid Build Coastguard Worker * allocating a large strm->next_in buffer and copying from it. 703*86ee64e7SAndroid Build Coastguard Worker * (See also flush_pending()). 704*86ee64e7SAndroid Build Coastguard Worker */ 705*86ee64e7SAndroid Build Coastguard Worker-local unsigned read_buf(strm, buf, size) 706*86ee64e7SAndroid Build Coastguard Worker+ZLIB_INTERNAL unsigned deflate_read_buf(strm, buf, size) 707*86ee64e7SAndroid Build Coastguard Worker z_streamp strm; 708*86ee64e7SAndroid Build Coastguard Worker Bytef *buf; 709*86ee64e7SAndroid Build Coastguard Worker unsigned size; 710*86ee64e7SAndroid Build Coastguard Worker@@ -1173,15 +1205,16 @@ local unsigned read_buf(strm, buf, size) 711*86ee64e7SAndroid Build Coastguard Worker 712*86ee64e7SAndroid Build Coastguard Worker strm->avail_in -= len; 713*86ee64e7SAndroid Build Coastguard Worker 714*86ee64e7SAndroid Build Coastguard Worker- zmemcpy(buf, strm->next_in, len); 715*86ee64e7SAndroid Build Coastguard Worker- if (strm->state->wrap == 1) { 716*86ee64e7SAndroid Build Coastguard Worker- strm->adler = adler32(strm->adler, buf, len); 717*86ee64e7SAndroid Build Coastguard Worker- } 718*86ee64e7SAndroid Build Coastguard Worker #ifdef GZIP 719*86ee64e7SAndroid Build Coastguard Worker- else if (strm->state->wrap == 2) { 720*86ee64e7SAndroid Build Coastguard Worker- strm->adler = crc32(strm->adler, buf, len); 721*86ee64e7SAndroid Build Coastguard Worker- } 722*86ee64e7SAndroid Build Coastguard Worker+ if (strm->state->wrap == 2) 723*86ee64e7SAndroid Build Coastguard Worker+ copy_with_crc(strm, buf, len); 724*86ee64e7SAndroid Build Coastguard Worker+ else 725*86ee64e7SAndroid Build Coastguard Worker #endif 726*86ee64e7SAndroid Build Coastguard Worker+ { 727*86ee64e7SAndroid Build Coastguard Worker+ zmemcpy(buf, strm->next_in, len); 728*86ee64e7SAndroid Build Coastguard Worker+ if (strm->state->wrap == 1) 729*86ee64e7SAndroid Build Coastguard Worker+ strm->adler = adler32(strm->adler, buf, len); 730*86ee64e7SAndroid Build Coastguard Worker+ } 731*86ee64e7SAndroid Build Coastguard Worker strm->next_in += len; 732*86ee64e7SAndroid Build Coastguard Worker strm->total_in += len; 733*86ee64e7SAndroid Build Coastguard Worker 734*86ee64e7SAndroid Build Coastguard Worker@@ -1479,7 +1512,19 @@ local void check_match(s, start, match, length) 735*86ee64e7SAndroid Build Coastguard Worker * performed for at least two bytes (required for the zip translate_eol 736*86ee64e7SAndroid Build Coastguard Worker * option -- not supported here). 737*86ee64e7SAndroid Build Coastguard Worker */ 738*86ee64e7SAndroid Build Coastguard Worker-local void fill_window(s) 739*86ee64e7SAndroid Build Coastguard Worker+local void fill_window_c(deflate_state *s); 740*86ee64e7SAndroid Build Coastguard Worker+ 741*86ee64e7SAndroid Build Coastguard Worker+local void fill_window(deflate_state *s) 742*86ee64e7SAndroid Build Coastguard Worker+{ 743*86ee64e7SAndroid Build Coastguard Worker+ if (x86_cpu_enable_simd) { 744*86ee64e7SAndroid Build Coastguard Worker+ fill_window_sse(s); 745*86ee64e7SAndroid Build Coastguard Worker+ return; 746*86ee64e7SAndroid Build Coastguard Worker+ } 747*86ee64e7SAndroid Build Coastguard Worker+ 748*86ee64e7SAndroid Build Coastguard Worker+ fill_window_c(s); 749*86ee64e7SAndroid Build Coastguard Worker+} 750*86ee64e7SAndroid Build Coastguard Worker+ 751*86ee64e7SAndroid Build Coastguard Worker+local void fill_window_c(s) 752*86ee64e7SAndroid Build Coastguard Worker deflate_state *s; 753*86ee64e7SAndroid Build Coastguard Worker { 754*86ee64e7SAndroid Build Coastguard Worker unsigned n; 755*86ee64e7SAndroid Build Coastguard Worker@@ -1847,7 +1892,7 @@ local block_state deflate_fast(s, flush) 756*86ee64e7SAndroid Build Coastguard Worker */ 757*86ee64e7SAndroid Build Coastguard Worker hash_head = NIL; 758*86ee64e7SAndroid Build Coastguard Worker if (s->lookahead >= MIN_MATCH) { 759*86ee64e7SAndroid Build Coastguard Worker- INSERT_STRING(s, s->strstart, hash_head); 760*86ee64e7SAndroid Build Coastguard Worker+ hash_head = insert_string(s, s->strstart); 761*86ee64e7SAndroid Build Coastguard Worker } 762*86ee64e7SAndroid Build Coastguard Worker 763*86ee64e7SAndroid Build Coastguard Worker /* Find the longest match, discarding those <= prev_length. 764*86ee64e7SAndroid Build Coastguard Worker@@ -1878,7 +1923,7 @@ local block_state deflate_fast(s, flush) 765*86ee64e7SAndroid Build Coastguard Worker s->match_length--; /* string at strstart already in table */ 766*86ee64e7SAndroid Build Coastguard Worker do { 767*86ee64e7SAndroid Build Coastguard Worker s->strstart++; 768*86ee64e7SAndroid Build Coastguard Worker- INSERT_STRING(s, s->strstart, hash_head); 769*86ee64e7SAndroid Build Coastguard Worker+ hash_head = insert_string(s, s->strstart); 770*86ee64e7SAndroid Build Coastguard Worker /* strstart never exceeds WSIZE-MAX_MATCH, so there are 771*86ee64e7SAndroid Build Coastguard Worker * always MIN_MATCH bytes ahead. 772*86ee64e7SAndroid Build Coastguard Worker */ 773*86ee64e7SAndroid Build Coastguard Worker@@ -1950,7 +1995,7 @@ local block_state deflate_slow(s, flush) 774*86ee64e7SAndroid Build Coastguard Worker */ 775*86ee64e7SAndroid Build Coastguard Worker hash_head = NIL; 776*86ee64e7SAndroid Build Coastguard Worker if (s->lookahead >= MIN_MATCH) { 777*86ee64e7SAndroid Build Coastguard Worker- INSERT_STRING(s, s->strstart, hash_head); 778*86ee64e7SAndroid Build Coastguard Worker+ hash_head = insert_string(s, s->strstart); 779*86ee64e7SAndroid Build Coastguard Worker } 780*86ee64e7SAndroid Build Coastguard Worker 781*86ee64e7SAndroid Build Coastguard Worker /* Find the longest match, discarding those <= prev_length. 782*86ee64e7SAndroid Build Coastguard Worker@@ -2001,7 +2046,7 @@ local block_state deflate_slow(s, flush) 783*86ee64e7SAndroid Build Coastguard Worker s->prev_length -= 2; 784*86ee64e7SAndroid Build Coastguard Worker do { 785*86ee64e7SAndroid Build Coastguard Worker if (++s->strstart <= max_insert) { 786*86ee64e7SAndroid Build Coastguard Worker- INSERT_STRING(s, s->strstart, hash_head); 787*86ee64e7SAndroid Build Coastguard Worker+ hash_head = insert_string(s, s->strstart); 788*86ee64e7SAndroid Build Coastguard Worker } 789*86ee64e7SAndroid Build Coastguard Worker } while (--s->prev_length != 0); 790*86ee64e7SAndroid Build Coastguard Worker s->match_available = 0; 791*86ee64e7SAndroid Build Coastguard Worker@@ -2161,3 +2206,37 @@ local block_state deflate_huff(s, flush) 792*86ee64e7SAndroid Build Coastguard Worker FLUSH_BLOCK(s, 0); 793*86ee64e7SAndroid Build Coastguard Worker return block_done; 794*86ee64e7SAndroid Build Coastguard Worker } 795*86ee64e7SAndroid Build Coastguard Worker+ 796*86ee64e7SAndroid Build Coastguard Worker+/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will 797*86ee64e7SAndroid Build Coastguard Worker+ * use intrinsic without extra params 798*86ee64e7SAndroid Build Coastguard Worker+ */ 799*86ee64e7SAndroid Build Coastguard Worker+local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str) 800*86ee64e7SAndroid Build Coastguard Worker+{ 801*86ee64e7SAndroid Build Coastguard Worker+ Pos ret; 802*86ee64e7SAndroid Build Coastguard Worker+ unsigned *ip, val, h = 0; 803*86ee64e7SAndroid Build Coastguard Worker+ 804*86ee64e7SAndroid Build Coastguard Worker+ ip = (unsigned *)&s->window[str]; 805*86ee64e7SAndroid Build Coastguard Worker+ val = *ip; 806*86ee64e7SAndroid Build Coastguard Worker+ 807*86ee64e7SAndroid Build Coastguard Worker+ if (s->level >= 6) 808*86ee64e7SAndroid Build Coastguard Worker+ val &= 0xFFFFFF; 809*86ee64e7SAndroid Build Coastguard Worker+ 810*86ee64e7SAndroid Build Coastguard Worker+/* Windows clang should use inline asm */ 811*86ee64e7SAndroid Build Coastguard Worker+#if defined(_MSC_VER) && !defined(__clang__) 812*86ee64e7SAndroid Build Coastguard Worker+ h = _mm_crc32_u32(h, val); 813*86ee64e7SAndroid Build Coastguard Worker+#elif defined(__i386__) || defined(__amd64__) 814*86ee64e7SAndroid Build Coastguard Worker+ __asm__ __volatile__ ( 815*86ee64e7SAndroid Build Coastguard Worker+ "crc32 %1,%0\n\t" 816*86ee64e7SAndroid Build Coastguard Worker+ : "+r" (h) 817*86ee64e7SAndroid Build Coastguard Worker+ : "r" (val) 818*86ee64e7SAndroid Build Coastguard Worker+ ); 819*86ee64e7SAndroid Build Coastguard Worker+#else 820*86ee64e7SAndroid Build Coastguard Worker+ /* This should never happen */ 821*86ee64e7SAndroid Build Coastguard Worker+ assert(0); 822*86ee64e7SAndroid Build Coastguard Worker+#endif 823*86ee64e7SAndroid Build Coastguard Worker+ 824*86ee64e7SAndroid Build Coastguard Worker+ ret = s->head[h & s->hash_mask]; 825*86ee64e7SAndroid Build Coastguard Worker+ s->head[h & s->hash_mask] = str; 826*86ee64e7SAndroid Build Coastguard Worker+ s->prev[str & s->w_mask] = ret; 827*86ee64e7SAndroid Build Coastguard Worker+ return ret; 828*86ee64e7SAndroid Build Coastguard Worker+} 829*86ee64e7SAndroid Build Coastguard Workerdiff --git a/deflate.h b/deflate.h 830*86ee64e7SAndroid Build Coastguard Workerindex 23ecdd312bc0..ab56df7663b6 100644 831*86ee64e7SAndroid Build Coastguard Worker--- a/deflate.h 832*86ee64e7SAndroid Build Coastguard Worker+++ b/deflate.h 833*86ee64e7SAndroid Build Coastguard Worker@@ -109,7 +109,7 @@ typedef struct internal_state { 834*86ee64e7SAndroid Build Coastguard Worker ulg gzindex; /* where in extra, name, or comment */ 835*86ee64e7SAndroid Build Coastguard Worker Byte method; /* can only be DEFLATED */ 836*86ee64e7SAndroid Build Coastguard Worker int last_flush; /* value of flush param for previous deflate call */ 837*86ee64e7SAndroid Build Coastguard Worker- 838*86ee64e7SAndroid Build Coastguard Worker+ unsigned zalign(16) crc0[4 * 5]; 839*86ee64e7SAndroid Build Coastguard Worker /* used by deflate.c: */ 840*86ee64e7SAndroid Build Coastguard Worker 841*86ee64e7SAndroid Build Coastguard Worker uInt w_size; /* LZ77 window size (32K by default) */ 842*86ee64e7SAndroid Build Coastguard Worker@@ -346,4 +346,14 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, 843*86ee64e7SAndroid Build Coastguard Worker flush = _tr_tally(s, distance, length) 844*86ee64e7SAndroid Build Coastguard Worker #endif 845*86ee64e7SAndroid Build Coastguard Worker 846*86ee64e7SAndroid Build Coastguard Worker+/* Functions that are SIMD optimised on x86 */ 847*86ee64e7SAndroid Build Coastguard Worker+void ZLIB_INTERNAL crc_fold_init(deflate_state* const s); 848*86ee64e7SAndroid Build Coastguard Worker+void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s, 849*86ee64e7SAndroid Build Coastguard Worker+ unsigned char* dst, 850*86ee64e7SAndroid Build Coastguard Worker+ const unsigned char* src, 851*86ee64e7SAndroid Build Coastguard Worker+ long len); 852*86ee64e7SAndroid Build Coastguard Worker+unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s); 853*86ee64e7SAndroid Build Coastguard Worker+ 854*86ee64e7SAndroid Build Coastguard Worker+void ZLIB_INTERNAL fill_window_sse(deflate_state* s); 855*86ee64e7SAndroid Build Coastguard Worker+ 856*86ee64e7SAndroid Build Coastguard Worker #endif /* DEFLATE_H */ 857*86ee64e7SAndroid Build Coastguard Workerdiff --git a/fill_window_sse.c b/fill_window_sse.c 858*86ee64e7SAndroid Build Coastguard Workernew file mode 100644 859*86ee64e7SAndroid Build Coastguard Workerindex 000000000000..949ccce1ba9c 860*86ee64e7SAndroid Build Coastguard Worker--- /dev/null 861*86ee64e7SAndroid Build Coastguard Worker+++ b/fill_window_sse.c 862*86ee64e7SAndroid Build Coastguard Worker@@ -0,0 +1,177 @@ 863*86ee64e7SAndroid Build Coastguard Worker+/* 864*86ee64e7SAndroid Build Coastguard Worker+ * Fill Window with SSE2-optimized hash shifting 865*86ee64e7SAndroid Build Coastguard Worker+ * 866*86ee64e7SAndroid Build Coastguard Worker+ * Copyright (C) 2013 Intel Corporation 867*86ee64e7SAndroid Build Coastguard Worker+ * Authors: 868*86ee64e7SAndroid Build Coastguard Worker+ * Arjan van de Ven <[email protected]> 869*86ee64e7SAndroid Build Coastguard Worker+ * Jim Kukunas <[email protected]> 870*86ee64e7SAndroid Build Coastguard Worker+ * 871*86ee64e7SAndroid Build Coastguard Worker+ * For conditions of distribution and use, see copyright notice in zlib.h 872*86ee64e7SAndroid Build Coastguard Worker+ */ 873*86ee64e7SAndroid Build Coastguard Worker+ 874*86ee64e7SAndroid Build Coastguard Worker+#include <immintrin.h> 875*86ee64e7SAndroid Build Coastguard Worker+#include "deflate.h" 876*86ee64e7SAndroid Build Coastguard Worker+ 877*86ee64e7SAndroid Build Coastguard Worker+#define UPDATE_HASH(s,h,i) \ 878*86ee64e7SAndroid Build Coastguard Worker+ {\ 879*86ee64e7SAndroid Build Coastguard Worker+ if (s->level < 6) { \ 880*86ee64e7SAndroid Build Coastguard Worker+ h = (3483 * (s->window[i]) +\ 881*86ee64e7SAndroid Build Coastguard Worker+ 23081* (s->window[i+1]) +\ 882*86ee64e7SAndroid Build Coastguard Worker+ 6954 * (s->window[i+2]) +\ 883*86ee64e7SAndroid Build Coastguard Worker+ 20947* (s->window[i+3])) & s->hash_mask;\ 884*86ee64e7SAndroid Build Coastguard Worker+ } else {\ 885*86ee64e7SAndroid Build Coastguard Worker+ h = (25881* (s->window[i]) +\ 886*86ee64e7SAndroid Build Coastguard Worker+ 24674* (s->window[i+1]) +\ 887*86ee64e7SAndroid Build Coastguard Worker+ 25811* (s->window[i+2])) & s->hash_mask;\ 888*86ee64e7SAndroid Build Coastguard Worker+ }\ 889*86ee64e7SAndroid Build Coastguard Worker+ }\ 890*86ee64e7SAndroid Build Coastguard Worker+ 891*86ee64e7SAndroid Build Coastguard Worker+extern int deflate_read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); 892*86ee64e7SAndroid Build Coastguard Worker+ 893*86ee64e7SAndroid Build Coastguard Worker+void fill_window_sse(deflate_state *s) 894*86ee64e7SAndroid Build Coastguard Worker+{ 895*86ee64e7SAndroid Build Coastguard Worker+ const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); 896*86ee64e7SAndroid Build Coastguard Worker+ 897*86ee64e7SAndroid Build Coastguard Worker+ register unsigned n; 898*86ee64e7SAndroid Build Coastguard Worker+ register Posf *p; 899*86ee64e7SAndroid Build Coastguard Worker+ unsigned more; /* Amount of free space at the end of the window. */ 900*86ee64e7SAndroid Build Coastguard Worker+ uInt wsize = s->w_size; 901*86ee64e7SAndroid Build Coastguard Worker+ 902*86ee64e7SAndroid Build Coastguard Worker+ Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); 903*86ee64e7SAndroid Build Coastguard Worker+ 904*86ee64e7SAndroid Build Coastguard Worker+ do { 905*86ee64e7SAndroid Build Coastguard Worker+ more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); 906*86ee64e7SAndroid Build Coastguard Worker+ 907*86ee64e7SAndroid Build Coastguard Worker+ /* Deal with !@#$% 64K limit: */ 908*86ee64e7SAndroid Build Coastguard Worker+ if (sizeof(int) <= 2) { 909*86ee64e7SAndroid Build Coastguard Worker+ if (more == 0 && s->strstart == 0 && s->lookahead == 0) { 910*86ee64e7SAndroid Build Coastguard Worker+ more = wsize; 911*86ee64e7SAndroid Build Coastguard Worker+ 912*86ee64e7SAndroid Build Coastguard Worker+ } else if (more == (unsigned)(-1)) { 913*86ee64e7SAndroid Build Coastguard Worker+ /* Very unlikely, but possible on 16 bit machine if 914*86ee64e7SAndroid Build Coastguard Worker+ * strstart == 0 && lookahead == 1 (input done a byte at time) 915*86ee64e7SAndroid Build Coastguard Worker+ */ 916*86ee64e7SAndroid Build Coastguard Worker+ more--; 917*86ee64e7SAndroid Build Coastguard Worker+ } 918*86ee64e7SAndroid Build Coastguard Worker+ } 919*86ee64e7SAndroid Build Coastguard Worker+ 920*86ee64e7SAndroid Build Coastguard Worker+ /* If the window is almost full and there is insufficient lookahead, 921*86ee64e7SAndroid Build Coastguard Worker+ * move the upper half to the lower one to make room in the upper half. 922*86ee64e7SAndroid Build Coastguard Worker+ */ 923*86ee64e7SAndroid Build Coastguard Worker+ if (s->strstart >= wsize+MAX_DIST(s)) { 924*86ee64e7SAndroid Build Coastguard Worker+ 925*86ee64e7SAndroid Build Coastguard Worker+ zmemcpy(s->window, s->window+wsize, (unsigned)wsize); 926*86ee64e7SAndroid Build Coastguard Worker+ s->match_start -= wsize; 927*86ee64e7SAndroid Build Coastguard Worker+ s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ 928*86ee64e7SAndroid Build Coastguard Worker+ s->block_start -= (long) wsize; 929*86ee64e7SAndroid Build Coastguard Worker+ 930*86ee64e7SAndroid Build Coastguard Worker+ /* Slide the hash table (could be avoided with 32 bit values 931*86ee64e7SAndroid Build Coastguard Worker+ at the expense of memory usage). We slide even when level == 0 932*86ee64e7SAndroid Build Coastguard Worker+ to keep the hash table consistent if we switch back to level > 0 933*86ee64e7SAndroid Build Coastguard Worker+ later. (Using level 0 permanently is not an optimal usage of 934*86ee64e7SAndroid Build Coastguard Worker+ zlib, so we don't care about this pathological case.) 935*86ee64e7SAndroid Build Coastguard Worker+ */ 936*86ee64e7SAndroid Build Coastguard Worker+ n = s->hash_size; 937*86ee64e7SAndroid Build Coastguard Worker+ p = &s->head[n]; 938*86ee64e7SAndroid Build Coastguard Worker+ p -= 8; 939*86ee64e7SAndroid Build Coastguard Worker+ do { 940*86ee64e7SAndroid Build Coastguard Worker+ __m128i value, result; 941*86ee64e7SAndroid Build Coastguard Worker+ 942*86ee64e7SAndroid Build Coastguard Worker+ value = _mm_loadu_si128((__m128i *)p); 943*86ee64e7SAndroid Build Coastguard Worker+ result = _mm_subs_epu16(value, xmm_wsize); 944*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)p, result); 945*86ee64e7SAndroid Build Coastguard Worker+ 946*86ee64e7SAndroid Build Coastguard Worker+ p -= 8; 947*86ee64e7SAndroid Build Coastguard Worker+ n -= 8; 948*86ee64e7SAndroid Build Coastguard Worker+ } while (n > 0); 949*86ee64e7SAndroid Build Coastguard Worker+ 950*86ee64e7SAndroid Build Coastguard Worker+ n = wsize; 951*86ee64e7SAndroid Build Coastguard Worker+#ifndef FASTEST 952*86ee64e7SAndroid Build Coastguard Worker+ p = &s->prev[n]; 953*86ee64e7SAndroid Build Coastguard Worker+ p -= 8; 954*86ee64e7SAndroid Build Coastguard Worker+ do { 955*86ee64e7SAndroid Build Coastguard Worker+ __m128i value, result; 956*86ee64e7SAndroid Build Coastguard Worker+ 957*86ee64e7SAndroid Build Coastguard Worker+ value = _mm_loadu_si128((__m128i *)p); 958*86ee64e7SAndroid Build Coastguard Worker+ result = _mm_subs_epu16(value, xmm_wsize); 959*86ee64e7SAndroid Build Coastguard Worker+ _mm_storeu_si128((__m128i *)p, result); 960*86ee64e7SAndroid Build Coastguard Worker+ 961*86ee64e7SAndroid Build Coastguard Worker+ p -= 8; 962*86ee64e7SAndroid Build Coastguard Worker+ n -= 8; 963*86ee64e7SAndroid Build Coastguard Worker+ } while (n > 0); 964*86ee64e7SAndroid Build Coastguard Worker+#endif 965*86ee64e7SAndroid Build Coastguard Worker+ more += wsize; 966*86ee64e7SAndroid Build Coastguard Worker+ } 967*86ee64e7SAndroid Build Coastguard Worker+ if (s->strm->avail_in == 0) break; 968*86ee64e7SAndroid Build Coastguard Worker+ 969*86ee64e7SAndroid Build Coastguard Worker+ /* If there was no sliding: 970*86ee64e7SAndroid Build Coastguard Worker+ * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && 971*86ee64e7SAndroid Build Coastguard Worker+ * more == window_size - lookahead - strstart 972*86ee64e7SAndroid Build Coastguard Worker+ * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) 973*86ee64e7SAndroid Build Coastguard Worker+ * => more >= window_size - 2*WSIZE + 2 974*86ee64e7SAndroid Build Coastguard Worker+ * In the BIG_MEM or MMAP case (not yet supported), 975*86ee64e7SAndroid Build Coastguard Worker+ * window_size == input_size + MIN_LOOKAHEAD && 976*86ee64e7SAndroid Build Coastguard Worker+ * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. 977*86ee64e7SAndroid Build Coastguard Worker+ * Otherwise, window_size == 2*WSIZE so more >= 2. 978*86ee64e7SAndroid Build Coastguard Worker+ * If there was sliding, more >= WSIZE. So in all cases, more >= 2. 979*86ee64e7SAndroid Build Coastguard Worker+ */ 980*86ee64e7SAndroid Build Coastguard Worker+ Assert(more >= 2, "more < 2"); 981*86ee64e7SAndroid Build Coastguard Worker+ 982*86ee64e7SAndroid Build Coastguard Worker+ n = deflate_read_buf(s->strm, 983*86ee64e7SAndroid Build Coastguard Worker+ s->window + s->strstart + s->lookahead, 984*86ee64e7SAndroid Build Coastguard Worker+ more); 985*86ee64e7SAndroid Build Coastguard Worker+ s->lookahead += n; 986*86ee64e7SAndroid Build Coastguard Worker+ 987*86ee64e7SAndroid Build Coastguard Worker+ /* Initialize the hash value now that we have some input: */ 988*86ee64e7SAndroid Build Coastguard Worker+ if (s->lookahead >= MIN_MATCH) { 989*86ee64e7SAndroid Build Coastguard Worker+ uInt str = s->strstart; 990*86ee64e7SAndroid Build Coastguard Worker+ s->ins_h = s->window[str]; 991*86ee64e7SAndroid Build Coastguard Worker+ if (str >= 1) 992*86ee64e7SAndroid Build Coastguard Worker+ UPDATE_HASH(s, s->ins_h, str + 1 - (MIN_MATCH-1)); 993*86ee64e7SAndroid Build Coastguard Worker+#if MIN_MATCH != 3 994*86ee64e7SAndroid Build Coastguard Worker+ Call UPDATE_HASH() MIN_MATCH-3 more times 995*86ee64e7SAndroid Build Coastguard Worker+#endif 996*86ee64e7SAndroid Build Coastguard Worker+ } 997*86ee64e7SAndroid Build Coastguard Worker+ /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, 998*86ee64e7SAndroid Build Coastguard Worker+ * but this is not important since only literal bytes will be emitted. 999*86ee64e7SAndroid Build Coastguard Worker+ */ 1000*86ee64e7SAndroid Build Coastguard Worker+ 1001*86ee64e7SAndroid Build Coastguard Worker+ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); 1002*86ee64e7SAndroid Build Coastguard Worker+ 1003*86ee64e7SAndroid Build Coastguard Worker+ /* If the WIN_INIT bytes after the end of the current data have never been 1004*86ee64e7SAndroid Build Coastguard Worker+ * written, then zero those bytes in order to avoid memory check reports of 1005*86ee64e7SAndroid Build Coastguard Worker+ * the use of uninitialized (or uninitialised as Julian writes) bytes by 1006*86ee64e7SAndroid Build Coastguard Worker+ * the longest match routines. Update the high water mark for the next 1007*86ee64e7SAndroid Build Coastguard Worker+ * time through here. WIN_INIT is set to MAX_MATCH since the longest match 1008*86ee64e7SAndroid Build Coastguard Worker+ * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. 1009*86ee64e7SAndroid Build Coastguard Worker+ */ 1010*86ee64e7SAndroid Build Coastguard Worker+ if (s->high_water < s->window_size) { 1011*86ee64e7SAndroid Build Coastguard Worker+ ulg curr = s->strstart + (ulg)(s->lookahead); 1012*86ee64e7SAndroid Build Coastguard Worker+ ulg init; 1013*86ee64e7SAndroid Build Coastguard Worker+ 1014*86ee64e7SAndroid Build Coastguard Worker+ if (s->high_water < curr) { 1015*86ee64e7SAndroid Build Coastguard Worker+ /* Previous high water mark below current data -- zero WIN_INIT 1016*86ee64e7SAndroid Build Coastguard Worker+ * bytes or up to end of window, whichever is less. 1017*86ee64e7SAndroid Build Coastguard Worker+ */ 1018*86ee64e7SAndroid Build Coastguard Worker+ init = s->window_size - curr; 1019*86ee64e7SAndroid Build Coastguard Worker+ if (init > WIN_INIT) 1020*86ee64e7SAndroid Build Coastguard Worker+ init = WIN_INIT; 1021*86ee64e7SAndroid Build Coastguard Worker+ zmemzero(s->window + curr, (unsigned)init); 1022*86ee64e7SAndroid Build Coastguard Worker+ s->high_water = curr + init; 1023*86ee64e7SAndroid Build Coastguard Worker+ } 1024*86ee64e7SAndroid Build Coastguard Worker+ else if (s->high_water < (ulg)curr + WIN_INIT) { 1025*86ee64e7SAndroid Build Coastguard Worker+ /* High water mark at or above current data, but below current data 1026*86ee64e7SAndroid Build Coastguard Worker+ * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up 1027*86ee64e7SAndroid Build Coastguard Worker+ * to end of window, whichever is less. 1028*86ee64e7SAndroid Build Coastguard Worker+ */ 1029*86ee64e7SAndroid Build Coastguard Worker+ init = (ulg)curr + WIN_INIT - s->high_water; 1030*86ee64e7SAndroid Build Coastguard Worker+ if (init > s->window_size - s->high_water) 1031*86ee64e7SAndroid Build Coastguard Worker+ init = s->window_size - s->high_water; 1032*86ee64e7SAndroid Build Coastguard Worker+ zmemzero(s->window + s->high_water, (unsigned)init); 1033*86ee64e7SAndroid Build Coastguard Worker+ s->high_water += init; 1034*86ee64e7SAndroid Build Coastguard Worker+ } 1035*86ee64e7SAndroid Build Coastguard Worker+ } 1036*86ee64e7SAndroid Build Coastguard Worker+ 1037*86ee64e7SAndroid Build Coastguard Worker+ Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, 1038*86ee64e7SAndroid Build Coastguard Worker+ "not enough room for search"); 1039*86ee64e7SAndroid Build Coastguard Worker+} 1040*86ee64e7SAndroid Build Coastguard Workerdiff --git a/simd_stub.c b/simd_stub.c 1041*86ee64e7SAndroid Build Coastguard Workernew file mode 100644 1042*86ee64e7SAndroid Build Coastguard Workerindex 000000000000..c6d46051498f 1043*86ee64e7SAndroid Build Coastguard Worker--- /dev/null 1044*86ee64e7SAndroid Build Coastguard Worker+++ b/simd_stub.c 1045*86ee64e7SAndroid Build Coastguard Worker@@ -0,0 +1,35 @@ 1046*86ee64e7SAndroid Build Coastguard Worker+/* simd_stub.c -- stub implementations 1047*86ee64e7SAndroid Build Coastguard Worker+* Copyright (C) 2014 Intel Corporation 1048*86ee64e7SAndroid Build Coastguard Worker+* For conditions of distribution and use, see copyright notice in zlib.h 1049*86ee64e7SAndroid Build Coastguard Worker+*/ 1050*86ee64e7SAndroid Build Coastguard Worker+#include <assert.h> 1051*86ee64e7SAndroid Build Coastguard Worker+ 1052*86ee64e7SAndroid Build Coastguard Worker+#include "deflate.h" 1053*86ee64e7SAndroid Build Coastguard Worker+#include "x86.h" 1054*86ee64e7SAndroid Build Coastguard Worker+ 1055*86ee64e7SAndroid Build Coastguard Worker+int ZLIB_INTERNAL x86_cpu_enable_simd = 0; 1056*86ee64e7SAndroid Build Coastguard Worker+ 1057*86ee64e7SAndroid Build Coastguard Worker+void ZLIB_INTERNAL crc_fold_init(deflate_state *const s) { 1058*86ee64e7SAndroid Build Coastguard Worker+ assert(0); 1059*86ee64e7SAndroid Build Coastguard Worker+} 1060*86ee64e7SAndroid Build Coastguard Worker+ 1061*86ee64e7SAndroid Build Coastguard Worker+void ZLIB_INTERNAL crc_fold_copy(deflate_state *const s, 1062*86ee64e7SAndroid Build Coastguard Worker+ unsigned char *dst, 1063*86ee64e7SAndroid Build Coastguard Worker+ const unsigned char *src, 1064*86ee64e7SAndroid Build Coastguard Worker+ long len) { 1065*86ee64e7SAndroid Build Coastguard Worker+ assert(0); 1066*86ee64e7SAndroid Build Coastguard Worker+} 1067*86ee64e7SAndroid Build Coastguard Worker+ 1068*86ee64e7SAndroid Build Coastguard Worker+unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) { 1069*86ee64e7SAndroid Build Coastguard Worker+ assert(0); 1070*86ee64e7SAndroid Build Coastguard Worker+ return 0; 1071*86ee64e7SAndroid Build Coastguard Worker+} 1072*86ee64e7SAndroid Build Coastguard Worker+ 1073*86ee64e7SAndroid Build Coastguard Worker+void ZLIB_INTERNAL fill_window_sse(deflate_state *s) 1074*86ee64e7SAndroid Build Coastguard Worker+{ 1075*86ee64e7SAndroid Build Coastguard Worker+ assert(0); 1076*86ee64e7SAndroid Build Coastguard Worker+} 1077*86ee64e7SAndroid Build Coastguard Worker+ 1078*86ee64e7SAndroid Build Coastguard Worker+void x86_check_features(void) 1079*86ee64e7SAndroid Build Coastguard Worker+{ 1080*86ee64e7SAndroid Build Coastguard Worker+} 1081*86ee64e7SAndroid Build Coastguard Workerdiff --git a/x86.c b/x86.c 1082*86ee64e7SAndroid Build Coastguard Workernew file mode 100644 1083*86ee64e7SAndroid Build Coastguard Workerindex 000000000000..e56fe8b85a39 1084*86ee64e7SAndroid Build Coastguard Worker--- /dev/null 1085*86ee64e7SAndroid Build Coastguard Worker+++ b/x86.c 1086*86ee64e7SAndroid Build Coastguard Worker@@ -0,0 +1,92 @@ 1087*86ee64e7SAndroid Build Coastguard Worker+/* 1088*86ee64e7SAndroid Build Coastguard Worker+ * x86 feature check 1089*86ee64e7SAndroid Build Coastguard Worker+ * 1090*86ee64e7SAndroid Build Coastguard Worker+ * Copyright (C) 2013 Intel Corporation. All rights reserved. 1091*86ee64e7SAndroid Build Coastguard Worker+ * Author: 1092*86ee64e7SAndroid Build Coastguard Worker+ * Jim Kukunas 1093*86ee64e7SAndroid Build Coastguard Worker+ * 1094*86ee64e7SAndroid Build Coastguard Worker+ * For conditions of distribution and use, see copyright notice in zlib.h 1095*86ee64e7SAndroid Build Coastguard Worker+ */ 1096*86ee64e7SAndroid Build Coastguard Worker+ 1097*86ee64e7SAndroid Build Coastguard Worker+#include "x86.h" 1098*86ee64e7SAndroid Build Coastguard Worker+#include "zutil.h" 1099*86ee64e7SAndroid Build Coastguard Worker+ 1100*86ee64e7SAndroid Build Coastguard Worker+int ZLIB_INTERNAL x86_cpu_enable_simd = 0; 1101*86ee64e7SAndroid Build Coastguard Worker+ 1102*86ee64e7SAndroid Build Coastguard Worker+#ifndef _MSC_VER 1103*86ee64e7SAndroid Build Coastguard Worker+#include <pthread.h> 1104*86ee64e7SAndroid Build Coastguard Worker+ 1105*86ee64e7SAndroid Build Coastguard Worker+pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT; 1106*86ee64e7SAndroid Build Coastguard Worker+static void _x86_check_features(void); 1107*86ee64e7SAndroid Build Coastguard Worker+ 1108*86ee64e7SAndroid Build Coastguard Worker+void x86_check_features(void) 1109*86ee64e7SAndroid Build Coastguard Worker+{ 1110*86ee64e7SAndroid Build Coastguard Worker+ pthread_once(&cpu_check_inited_once, _x86_check_features); 1111*86ee64e7SAndroid Build Coastguard Worker+} 1112*86ee64e7SAndroid Build Coastguard Worker+ 1113*86ee64e7SAndroid Build Coastguard Worker+static void _x86_check_features(void) 1114*86ee64e7SAndroid Build Coastguard Worker+{ 1115*86ee64e7SAndroid Build Coastguard Worker+ int x86_cpu_has_sse2; 1116*86ee64e7SAndroid Build Coastguard Worker+ int x86_cpu_has_sse42; 1117*86ee64e7SAndroid Build Coastguard Worker+ int x86_cpu_has_pclmulqdq; 1118*86ee64e7SAndroid Build Coastguard Worker+ unsigned eax, ebx, ecx, edx; 1119*86ee64e7SAndroid Build Coastguard Worker+ 1120*86ee64e7SAndroid Build Coastguard Worker+ eax = 1; 1121*86ee64e7SAndroid Build Coastguard Worker+#ifdef __i386__ 1122*86ee64e7SAndroid Build Coastguard Worker+ __asm__ __volatile__ ( 1123*86ee64e7SAndroid Build Coastguard Worker+ "xchg %%ebx, %1\n\t" 1124*86ee64e7SAndroid Build Coastguard Worker+ "cpuid\n\t" 1125*86ee64e7SAndroid Build Coastguard Worker+ "xchg %1, %%ebx\n\t" 1126*86ee64e7SAndroid Build Coastguard Worker+ : "+a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) 1127*86ee64e7SAndroid Build Coastguard Worker+ ); 1128*86ee64e7SAndroid Build Coastguard Worker+#else 1129*86ee64e7SAndroid Build Coastguard Worker+ __asm__ __volatile__ ( 1130*86ee64e7SAndroid Build Coastguard Worker+ "cpuid\n\t" 1131*86ee64e7SAndroid Build Coastguard Worker+ : "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) 1132*86ee64e7SAndroid Build Coastguard Worker+ ); 1133*86ee64e7SAndroid Build Coastguard Worker+#endif /* (__i386__) */ 1134*86ee64e7SAndroid Build Coastguard Worker+ 1135*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_sse2 = edx & 0x4000000; 1136*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_sse42 = ecx & 0x100000; 1137*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_pclmulqdq = ecx & 0x2; 1138*86ee64e7SAndroid Build Coastguard Worker+ 1139*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_enable_simd = x86_cpu_has_sse2 && 1140*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_sse42 && 1141*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_pclmulqdq; 1142*86ee64e7SAndroid Build Coastguard Worker+} 1143*86ee64e7SAndroid Build Coastguard Worker+#else 1144*86ee64e7SAndroid Build Coastguard Worker+#include <intrin.h> 1145*86ee64e7SAndroid Build Coastguard Worker+#include <windows.h> 1146*86ee64e7SAndroid Build Coastguard Worker+ 1147*86ee64e7SAndroid Build Coastguard Worker+static BOOL CALLBACK _x86_check_features(PINIT_ONCE once, 1148*86ee64e7SAndroid Build Coastguard Worker+ PVOID param, 1149*86ee64e7SAndroid Build Coastguard Worker+ PVOID *context); 1150*86ee64e7SAndroid Build Coastguard Worker+static INIT_ONCE cpu_check_inited_once = INIT_ONCE_STATIC_INIT; 1151*86ee64e7SAndroid Build Coastguard Worker+ 1152*86ee64e7SAndroid Build Coastguard Worker+void x86_check_features(void) 1153*86ee64e7SAndroid Build Coastguard Worker+{ 1154*86ee64e7SAndroid Build Coastguard Worker+ InitOnceExecuteOnce(&cpu_check_inited_once, _x86_check_features, 1155*86ee64e7SAndroid Build Coastguard Worker+ NULL, NULL); 1156*86ee64e7SAndroid Build Coastguard Worker+} 1157*86ee64e7SAndroid Build Coastguard Worker+ 1158*86ee64e7SAndroid Build Coastguard Worker+static BOOL CALLBACK _x86_check_features(PINIT_ONCE once, 1159*86ee64e7SAndroid Build Coastguard Worker+ PVOID param, 1160*86ee64e7SAndroid Build Coastguard Worker+ PVOID *context) 1161*86ee64e7SAndroid Build Coastguard Worker+{ 1162*86ee64e7SAndroid Build Coastguard Worker+ int x86_cpu_has_sse2; 1163*86ee64e7SAndroid Build Coastguard Worker+ int x86_cpu_has_sse42; 1164*86ee64e7SAndroid Build Coastguard Worker+ int x86_cpu_has_pclmulqdq; 1165*86ee64e7SAndroid Build Coastguard Worker+ int regs[4]; 1166*86ee64e7SAndroid Build Coastguard Worker+ 1167*86ee64e7SAndroid Build Coastguard Worker+ __cpuid(regs, 1); 1168*86ee64e7SAndroid Build Coastguard Worker+ 1169*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_sse2 = regs[3] & 0x4000000; 1170*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_sse42= regs[2] & 0x100000; 1171*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_pclmulqdq = regs[2] & 0x2; 1172*86ee64e7SAndroid Build Coastguard Worker+ 1173*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_enable_simd = x86_cpu_has_sse2 && 1174*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_sse42 && 1175*86ee64e7SAndroid Build Coastguard Worker+ x86_cpu_has_pclmulqdq; 1176*86ee64e7SAndroid Build Coastguard Worker+ return TRUE; 1177*86ee64e7SAndroid Build Coastguard Worker+} 1178*86ee64e7SAndroid Build Coastguard Worker+#endif /* _MSC_VER */ 1179*86ee64e7SAndroid Build Coastguard Workerdiff --git a/x86.h b/x86.h 1180*86ee64e7SAndroid Build Coastguard Workernew file mode 100644 1181*86ee64e7SAndroid Build Coastguard Workerindex 000000000000..ebcf10ab09d2 1182*86ee64e7SAndroid Build Coastguard Worker--- /dev/null 1183*86ee64e7SAndroid Build Coastguard Worker+++ b/x86.h 1184*86ee64e7SAndroid Build Coastguard Worker@@ -0,0 +1,15 @@ 1185*86ee64e7SAndroid Build Coastguard Worker+/* x86.h -- check for x86 CPU features 1186*86ee64e7SAndroid Build Coastguard Worker+* Copyright (C) 2013 Intel Corporation Jim Kukunas 1187*86ee64e7SAndroid Build Coastguard Worker+* For conditions of distribution and use, see copyright notice in zlib.h 1188*86ee64e7SAndroid Build Coastguard Worker+*/ 1189*86ee64e7SAndroid Build Coastguard Worker+ 1190*86ee64e7SAndroid Build Coastguard Worker+#ifndef X86_H 1191*86ee64e7SAndroid Build Coastguard Worker+#define X86_H 1192*86ee64e7SAndroid Build Coastguard Worker+ 1193*86ee64e7SAndroid Build Coastguard Worker+#include "zlib.h" 1194*86ee64e7SAndroid Build Coastguard Worker+ 1195*86ee64e7SAndroid Build Coastguard Worker+extern int x86_cpu_enable_simd; 1196*86ee64e7SAndroid Build Coastguard Worker+ 1197*86ee64e7SAndroid Build Coastguard Worker+void x86_check_features(void); 1198*86ee64e7SAndroid Build Coastguard Worker+ 1199*86ee64e7SAndroid Build Coastguard Worker+#endif /* X86_H */ 1200*86ee64e7SAndroid Build Coastguard Workerdiff --git a/zutil.h b/zutil.h 1201*86ee64e7SAndroid Build Coastguard Workerindex 80375b8b6109..4425bcf75eb3 100644 1202*86ee64e7SAndroid Build Coastguard Worker--- a/zutil.h 1203*86ee64e7SAndroid Build Coastguard Worker+++ b/zutil.h 1204*86ee64e7SAndroid Build Coastguard Worker@@ -283,4 +283,10 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ 1205*86ee64e7SAndroid Build Coastguard Worker #define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ 1206*86ee64e7SAndroid Build Coastguard Worker (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) 1207*86ee64e7SAndroid Build Coastguard Worker 1208*86ee64e7SAndroid Build Coastguard Worker+#ifdef _MSC_VER 1209*86ee64e7SAndroid Build Coastguard Worker+#define zalign(x) __declspec(align(x)) 1210*86ee64e7SAndroid Build Coastguard Worker+#else 1211*86ee64e7SAndroid Build Coastguard Worker+#define zalign(x) __attribute__((aligned((x)))) 1212*86ee64e7SAndroid Build Coastguard Worker+#endif 1213*86ee64e7SAndroid Build Coastguard Worker+ 1214*86ee64e7SAndroid Build Coastguard Worker #endif /* ZUTIL_H */ 1215