1 /* compare256_neon.c - NEON version of compare256
2  * Copyright (C) 2022 Nathan Moinvaziri
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
7 #ifdef _M_ARM64
8 #  include <arm64_neon.h>
9 #else
10 #  include <arm_neon.h>
11 #endif
12 #include "../../zbuild.h"
13 
compare256_neon_static(const uint8_t * src0,const uint8_t * src1)14 static inline uint32_t compare256_neon_static(const uint8_t *src0, const uint8_t *src1) {
15     uint32_t len = 0;
16 
17     do {
18         uint8x16_t a, b, cmp;
19         uint64_t lane;
20 
21         a = vld1q_u8(src0);
22         b = vld1q_u8(src1);
23 
24         cmp = veorq_u8(a, b);
25 
26         lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0);
27         if (lane) {
28             uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8;
29             return len + match_byte;
30         }
31         len += 8;
32         lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1);
33         if (lane) {
34             uint32_t match_byte = (uint32_t)__builtin_ctzll(lane) / 8;
35             return len + match_byte;
36         }
37         len += 8;
38 
39         src0 += 16, src1 += 16;
40     } while (len < 256);
41 
42     return 256;
43 }
44 
compare256_neon(const uint8_t * src0,const uint8_t * src1)45 Z_INTERNAL uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1) {
46     return compare256_neon_static(src0, src1);
47 }
48 
49 #define LONGEST_MATCH       longest_match_neon
50 #define COMPARE256          compare256_neon_static
51 
52 #include "match_tpl.h"
53 
54 #define LONGEST_MATCH_SLOW
55 #define LONGEST_MATCH       longest_match_slow_neon
56 #define COMPARE256          compare256_neon_static
57 
58 #include "match_tpl.h"
59 
60 #endif
61