1*9356374aSAndroid Build Coastguard Worker // Copyright 2023 The Abseil Authors
2*9356374aSAndroid Build Coastguard Worker //
3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*9356374aSAndroid Build Coastguard Worker //
7*9356374aSAndroid Build Coastguard Worker // https://www.apache.org/licenses/LICENSE-2.0
8*9356374aSAndroid Build Coastguard Worker //
9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*9356374aSAndroid Build Coastguard Worker // limitations under the License.
14*9356374aSAndroid Build Coastguard Worker //
15*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
16*9356374aSAndroid Build Coastguard Worker // File: prefetch.h
17*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
18*9356374aSAndroid Build Coastguard Worker //
19*9356374aSAndroid Build Coastguard Worker // This header file defines prefetch functions to prefetch memory contents
20*9356374aSAndroid Build Coastguard Worker // into the first level cache (L1) for the current CPU. The prefetch logic
21*9356374aSAndroid Build Coastguard Worker // offered in this header is limited to prefetching first level cachelines
22*9356374aSAndroid Build Coastguard Worker // only, and is aimed at relatively 'simple' prefetching logic.
23*9356374aSAndroid Build Coastguard Worker //
24*9356374aSAndroid Build Coastguard Worker #ifndef ABSL_BASE_PREFETCH_H_
25*9356374aSAndroid Build Coastguard Worker #define ABSL_BASE_PREFETCH_H_
26*9356374aSAndroid Build Coastguard Worker
27*9356374aSAndroid Build Coastguard Worker #include "absl/base/attributes.h"
28*9356374aSAndroid Build Coastguard Worker #include "absl/base/config.h"
29*9356374aSAndroid Build Coastguard Worker
30*9356374aSAndroid Build Coastguard Worker #if defined(ABSL_INTERNAL_HAVE_SSE)
31*9356374aSAndroid Build Coastguard Worker #include <xmmintrin.h>
32*9356374aSAndroid Build Coastguard Worker #endif
33*9356374aSAndroid Build Coastguard Worker
34*9356374aSAndroid Build Coastguard Worker #if defined(_MSC_VER)
35*9356374aSAndroid Build Coastguard Worker #include <intrin.h>
36*9356374aSAndroid Build Coastguard Worker #if defined(ABSL_INTERNAL_HAVE_SSE)
37*9356374aSAndroid Build Coastguard Worker #pragma intrinsic(_mm_prefetch)
38*9356374aSAndroid Build Coastguard Worker #endif
39*9356374aSAndroid Build Coastguard Worker #endif
40*9356374aSAndroid Build Coastguard Worker
41*9356374aSAndroid Build Coastguard Worker namespace absl {
42*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
43*9356374aSAndroid Build Coastguard Worker
44*9356374aSAndroid Build Coastguard Worker // Moves data into the L1 cache before it is read, or "prefetches" it.
45*9356374aSAndroid Build Coastguard Worker //
46*9356374aSAndroid Build Coastguard Worker // The value of `addr` is the address of the memory to prefetch. If
47*9356374aSAndroid Build Coastguard Worker // the target and compiler support it, data prefetch instructions are
48*9356374aSAndroid Build Coastguard Worker // generated. If the prefetch is done some time before the memory is
49*9356374aSAndroid Build Coastguard Worker // read, it may be in the cache by the time the read occurs.
50*9356374aSAndroid Build Coastguard Worker //
51*9356374aSAndroid Build Coastguard Worker // This method prefetches data with the highest degree of temporal locality;
52*9356374aSAndroid Build Coastguard Worker // data is prefetched where possible into all levels of the cache.
53*9356374aSAndroid Build Coastguard Worker //
54*9356374aSAndroid Build Coastguard Worker // Incorrect or gratuitous use of this function can degrade performance.
55*9356374aSAndroid Build Coastguard Worker // Use this function only when representative benchmarks show an improvement.
56*9356374aSAndroid Build Coastguard Worker //
57*9356374aSAndroid Build Coastguard Worker // Example:
58*9356374aSAndroid Build Coastguard Worker //
59*9356374aSAndroid Build Coastguard Worker // // Computes incremental checksum for `data`.
60*9356374aSAndroid Build Coastguard Worker // int ComputeChecksum(int sum, absl::string_view data);
61*9356374aSAndroid Build Coastguard Worker //
62*9356374aSAndroid Build Coastguard Worker // // Computes cumulative checksum for all values in `data`
63*9356374aSAndroid Build Coastguard Worker // int ComputeChecksum(absl::Span<const std::string> data) {
64*9356374aSAndroid Build Coastguard Worker // int sum = 0;
65*9356374aSAndroid Build Coastguard Worker // auto it = data.begin();
66*9356374aSAndroid Build Coastguard Worker // auto pit = data.begin();
67*9356374aSAndroid Build Coastguard Worker // auto end = data.end();
68*9356374aSAndroid Build Coastguard Worker // for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
69*9356374aSAndroid Build Coastguard Worker // absl::PrefetchToLocalCache(pit->data());
70*9356374aSAndroid Build Coastguard Worker // }
71*9356374aSAndroid Build Coastguard Worker // for (; pit != end; ++pit, ++it) {
72*9356374aSAndroid Build Coastguard Worker // sum = ComputeChecksum(sum, *it);
73*9356374aSAndroid Build Coastguard Worker // absl::PrefetchToLocalCache(pit->data());
74*9356374aSAndroid Build Coastguard Worker // }
75*9356374aSAndroid Build Coastguard Worker // for (; it != end; ++it) {
76*9356374aSAndroid Build Coastguard Worker // sum = ComputeChecksum(sum, *it);
77*9356374aSAndroid Build Coastguard Worker // }
78*9356374aSAndroid Build Coastguard Worker // return sum;
79*9356374aSAndroid Build Coastguard Worker // }
80*9356374aSAndroid Build Coastguard Worker //
81*9356374aSAndroid Build Coastguard Worker void PrefetchToLocalCache(const void* addr);
82*9356374aSAndroid Build Coastguard Worker
83*9356374aSAndroid Build Coastguard Worker // Moves data into the L1 cache before it is read, or "prefetches" it.
84*9356374aSAndroid Build Coastguard Worker //
85*9356374aSAndroid Build Coastguard Worker // This function is identical to `PrefetchToLocalCache()` except that it has
86*9356374aSAndroid Build Coastguard Worker // non-temporal locality: the fetched data should not be left in any of the
87*9356374aSAndroid Build Coastguard Worker // cache tiers. This is useful for cases where the data is used only once /
88*9356374aSAndroid Build Coastguard Worker // short term, for example, invoking a destructor on an object.
89*9356374aSAndroid Build Coastguard Worker //
90*9356374aSAndroid Build Coastguard Worker // Incorrect or gratuitous use of this function can degrade performance.
91*9356374aSAndroid Build Coastguard Worker // Use this function only when representative benchmarks show an improvement.
92*9356374aSAndroid Build Coastguard Worker //
93*9356374aSAndroid Build Coastguard Worker // Example:
94*9356374aSAndroid Build Coastguard Worker //
95*9356374aSAndroid Build Coastguard Worker // template <typename Iterator>
96*9356374aSAndroid Build Coastguard Worker // void DestroyPointers(Iterator begin, Iterator end) {
97*9356374aSAndroid Build Coastguard Worker // size_t distance = std::min(8U, bars.size());
98*9356374aSAndroid Build Coastguard Worker //
99*9356374aSAndroid Build Coastguard Worker // int dist = 8;
100*9356374aSAndroid Build Coastguard Worker // auto prefetch_it = begin;
101*9356374aSAndroid Build Coastguard Worker // while (prefetch_it != end && --dist;) {
102*9356374aSAndroid Build Coastguard Worker // absl::PrefetchToLocalCacheNta(*prefetch_it++);
103*9356374aSAndroid Build Coastguard Worker // }
104*9356374aSAndroid Build Coastguard Worker // while (prefetch_it != end) {
105*9356374aSAndroid Build Coastguard Worker // delete *begin++;
106*9356374aSAndroid Build Coastguard Worker // absl::PrefetchToLocalCacheNta(*prefetch_it++);
107*9356374aSAndroid Build Coastguard Worker // }
108*9356374aSAndroid Build Coastguard Worker // while (begin != end) {
109*9356374aSAndroid Build Coastguard Worker // delete *begin++;
110*9356374aSAndroid Build Coastguard Worker // }
111*9356374aSAndroid Build Coastguard Worker // }
112*9356374aSAndroid Build Coastguard Worker //
113*9356374aSAndroid Build Coastguard Worker void PrefetchToLocalCacheNta(const void* addr);
114*9356374aSAndroid Build Coastguard Worker
115*9356374aSAndroid Build Coastguard Worker // Moves data into the L1 cache with the intent to modify it.
116*9356374aSAndroid Build Coastguard Worker //
117*9356374aSAndroid Build Coastguard Worker // This function is similar to `PrefetchToLocalCache()` except that it
118*9356374aSAndroid Build Coastguard Worker // prefetches cachelines with an 'intent to modify' This typically includes
119*9356374aSAndroid Build Coastguard Worker // invalidating cache entries for this address in all other cache tiers, and an
120*9356374aSAndroid Build Coastguard Worker // exclusive access intent.
121*9356374aSAndroid Build Coastguard Worker //
122*9356374aSAndroid Build Coastguard Worker // Incorrect or gratuitous use of this function can degrade performance. As this
123*9356374aSAndroid Build Coastguard Worker // function can invalidate cached cachelines on other caches and computer cores,
124*9356374aSAndroid Build Coastguard Worker // incorrect usage of this function can have an even greater negative impact
125*9356374aSAndroid Build Coastguard Worker // than incorrect regular prefetches.
126*9356374aSAndroid Build Coastguard Worker // Use this function only when representative benchmarks show an improvement.
127*9356374aSAndroid Build Coastguard Worker //
128*9356374aSAndroid Build Coastguard Worker // Example:
129*9356374aSAndroid Build Coastguard Worker //
130*9356374aSAndroid Build Coastguard Worker // void* Arena::Allocate(size_t size) {
131*9356374aSAndroid Build Coastguard Worker // void* ptr = AllocateBlock(size);
132*9356374aSAndroid Build Coastguard Worker // absl::PrefetchToLocalCacheForWrite(ptr);
133*9356374aSAndroid Build Coastguard Worker // return ptr;
134*9356374aSAndroid Build Coastguard Worker // }
135*9356374aSAndroid Build Coastguard Worker //
136*9356374aSAndroid Build Coastguard Worker void PrefetchToLocalCacheForWrite(const void* addr);
137*9356374aSAndroid Build Coastguard Worker
138*9356374aSAndroid Build Coastguard Worker #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
139*9356374aSAndroid Build Coastguard Worker
140*9356374aSAndroid Build Coastguard Worker #define ABSL_HAVE_PREFETCH 1
141*9356374aSAndroid Build Coastguard Worker
142*9356374aSAndroid Build Coastguard Worker // See __builtin_prefetch:
143*9356374aSAndroid Build Coastguard Worker // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
144*9356374aSAndroid Build Coastguard Worker //
PrefetchToLocalCache(const void * addr)145*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
146*9356374aSAndroid Build Coastguard Worker const void* addr) {
147*9356374aSAndroid Build Coastguard Worker __builtin_prefetch(addr, 0, 3);
148*9356374aSAndroid Build Coastguard Worker }
149*9356374aSAndroid Build Coastguard Worker
PrefetchToLocalCacheNta(const void * addr)150*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
151*9356374aSAndroid Build Coastguard Worker const void* addr) {
152*9356374aSAndroid Build Coastguard Worker __builtin_prefetch(addr, 0, 0);
153*9356374aSAndroid Build Coastguard Worker }
154*9356374aSAndroid Build Coastguard Worker
PrefetchToLocalCacheForWrite(const void * addr)155*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
156*9356374aSAndroid Build Coastguard Worker const void* addr) {
157*9356374aSAndroid Build Coastguard Worker // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
158*9356374aSAndroid Build Coastguard Worker // unless -march=broadwell or newer; this is not generally the default, so we
159*9356374aSAndroid Build Coastguard Worker // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
160*9356374aSAndroid Build Coastguard Worker // processors and has been present on AMD processors since the K6-2.
161*9356374aSAndroid Build Coastguard Worker #if defined(__x86_64__) && !defined(__PRFCHW__)
162*9356374aSAndroid Build Coastguard Worker asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
163*9356374aSAndroid Build Coastguard Worker #else
164*9356374aSAndroid Build Coastguard Worker __builtin_prefetch(addr, 1, 3);
165*9356374aSAndroid Build Coastguard Worker #endif
166*9356374aSAndroid Build Coastguard Worker }
167*9356374aSAndroid Build Coastguard Worker
168*9356374aSAndroid Build Coastguard Worker #elif defined(ABSL_INTERNAL_HAVE_SSE)
169*9356374aSAndroid Build Coastguard Worker
170*9356374aSAndroid Build Coastguard Worker #define ABSL_HAVE_PREFETCH 1
171*9356374aSAndroid Build Coastguard Worker
PrefetchToLocalCache(const void * addr)172*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
173*9356374aSAndroid Build Coastguard Worker const void* addr) {
174*9356374aSAndroid Build Coastguard Worker _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
175*9356374aSAndroid Build Coastguard Worker }
176*9356374aSAndroid Build Coastguard Worker
PrefetchToLocalCacheNta(const void * addr)177*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
178*9356374aSAndroid Build Coastguard Worker const void* addr) {
179*9356374aSAndroid Build Coastguard Worker _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
180*9356374aSAndroid Build Coastguard Worker }
181*9356374aSAndroid Build Coastguard Worker
PrefetchToLocalCacheForWrite(const void * addr)182*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
183*9356374aSAndroid Build Coastguard Worker const void* addr) {
184*9356374aSAndroid Build Coastguard Worker #if defined(_MM_HINT_ET0)
185*9356374aSAndroid Build Coastguard Worker _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
186*9356374aSAndroid Build Coastguard Worker #elif !defined(_MSC_VER) && defined(__x86_64__)
187*9356374aSAndroid Build Coastguard Worker // _MM_HINT_ET0 is not universally supported. As we commented further
188*9356374aSAndroid Build Coastguard Worker // up, PREFETCHW is recognized as a no-op on older Intel processors
189*9356374aSAndroid Build Coastguard Worker // and has been present on AMD processors since the K6-2. We have this
190*9356374aSAndroid Build Coastguard Worker // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
191*9356374aSAndroid Build Coastguard Worker asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
192*9356374aSAndroid Build Coastguard Worker #endif
193*9356374aSAndroid Build Coastguard Worker }
194*9356374aSAndroid Build Coastguard Worker
195*9356374aSAndroid Build Coastguard Worker #else
196*9356374aSAndroid Build Coastguard Worker
PrefetchToLocalCache(const void * addr)197*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
198*9356374aSAndroid Build Coastguard Worker const void* addr) {}
PrefetchToLocalCacheNta(const void * addr)199*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
200*9356374aSAndroid Build Coastguard Worker const void* addr) {}
PrefetchToLocalCacheForWrite(const void * addr)201*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
202*9356374aSAndroid Build Coastguard Worker const void* addr) {}
203*9356374aSAndroid Build Coastguard Worker
204*9356374aSAndroid Build Coastguard Worker #endif
205*9356374aSAndroid Build Coastguard Worker
206*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
207*9356374aSAndroid Build Coastguard Worker } // namespace absl
208*9356374aSAndroid Build Coastguard Worker
209*9356374aSAndroid Build Coastguard Worker #endif // ABSL_BASE_PREFETCH_H_
210