1 /* compare256_neon.c - NEON version of compare256
2 * Copyright (C) 2022 Nathan Moinvaziri
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
7 #ifdef _M_ARM64
8 # include <arm64_neon.h>
9 #else
10 # include <arm_neon.h>
11 #endif
12 #include "../../zbuild.h"
13
compare256_neon_static(const uint8_t * src0,const uint8_t * src1)14 static inline uint32_t compare256_neon_static(const uint8_t *src0, const uint8_t *src1) {
15 uint32_t len = 0;
16
17 do {
18 uint8x16_t a, b, cmp;
19 uint64_t lane;
20
21 a = vld1q_u8(src0);
22 b = vld1q_u8(src1);
23
24 cmp = veorq_u8(a, b);
25
26 lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0);
27 if (lane) {
28 uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8;
29 return len + match_byte;
30 }
31 len += 8;
32 lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1);
33 if (lane) {
34 uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8;
35 return len + match_byte;
36 }
37 len += 8;
38
39 src0 += 16, src1 += 16;
40 } while (len < 256);
41
42 return 256;
43 }
44
compare256_neon(const uint8_t * src0,const uint8_t * src1)45 Z_INTERNAL uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1) {
46 return compare256_neon_static(src0, src1);
47 }
48
49 #define LONGEST_MATCH longest_match_neon
50 #define COMPARE256 compare256_neon_static
51
52 #include "match_tpl.h"
53
54 #define LONGEST_MATCH_SLOW
55 #define LONGEST_MATCH longest_match_slow_neon
56 #define COMPARE256 compare256_neon_static
57
58 #include "match_tpl.h"
59
60 #endif
61