xref: /aosp_15_r20/external/angle/third_party/abseil-cpp/absl/base/prefetch.h (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 // Copyright 2023 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // -----------------------------------------------------------------------------
16 // File: prefetch.h
17 // -----------------------------------------------------------------------------
18 //
19 // This header file defines prefetch functions to prefetch memory contents
20 // into the first level cache (L1) for the current CPU. The prefetch logic
21 // offered in this header is limited to prefetching first level cachelines
22 // only, and is aimed at relatively 'simple' prefetching logic.
23 //
24 #ifndef ABSL_BASE_PREFETCH_H_
25 #define ABSL_BASE_PREFETCH_H_
26 
27 #include "absl/base/attributes.h"
28 #include "absl/base/config.h"
29 
30 #if defined(ABSL_INTERNAL_HAVE_SSE)
31 #include <xmmintrin.h>
32 #endif
33 
34 #if defined(_MSC_VER)
35 #include <intrin.h>
36 #if defined(ABSL_INTERNAL_HAVE_SSE)
37 #pragma intrinsic(_mm_prefetch)
38 #endif
39 #endif
40 
41 namespace absl {
42 ABSL_NAMESPACE_BEGIN
43 
44 // Moves data into the L1 cache before it is read, or "prefetches" it.
45 //
46 // The value of `addr` is the address of the memory to prefetch. If
47 // the target and compiler support it, data prefetch instructions are
48 // generated. If the prefetch is done some time before the memory is
49 // read, it may be in the cache by the time the read occurs.
50 //
51 // This method prefetches data with the highest degree of temporal locality;
52 // data is prefetched where possible into all levels of the cache.
53 //
54 // Incorrect or gratuitous use of this function can degrade performance.
55 // Use this function only when representative benchmarks show an improvement.
56 //
57 // Example:
58 //
59 //  // Computes incremental checksum for `data`.
60 //  int ComputeChecksum(int sum, absl::string_view data);
61 //
62 //  // Computes cumulative checksum for all values in `data`
63 //  int ComputeChecksum(absl::Span<const std::string> data) {
64 //    int sum = 0;
65 //    auto it = data.begin();
66 //    auto pit = data.begin();
67 //    auto end = data.end();
68 //    for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
69 //      absl::PrefetchToLocalCache(pit->data());
70 //    }
71 //    for (; pit != end; ++pit, ++it) {
72 //      sum = ComputeChecksum(sum, *it);
73 //      absl::PrefetchToLocalCache(pit->data());
74 //    }
75 //    for (; it != end; ++it) {
76 //      sum = ComputeChecksum(sum, *it);
77 //    }
78 //    return sum;
79 //  }
80 //
81 void PrefetchToLocalCache(const void* addr);
82 
83 // Moves data into the L1 cache before it is read, or "prefetches" it.
84 //
85 // This function is identical to `PrefetchToLocalCache()` except that it has
86 // non-temporal locality: the fetched data should not be left in any of the
87 // cache tiers. This is useful for cases where the data is used only once /
88 // short term, for example, invoking a destructor on an object.
89 //
90 // Incorrect or gratuitous use of this function can degrade performance.
91 // Use this function only when representative benchmarks show an improvement.
92 //
93 // Example:
94 //
95 //  template <typename Iterator>
96 //  void DestroyPointers(Iterator begin, Iterator end) {
97 //    size_t distance = std::min(8U, bars.size());
98 //
99 //    int dist = 8;
100 //    auto prefetch_it = begin;
101 //    while (prefetch_it != end && --dist;) {
102 //      absl::PrefetchToLocalCacheNta(*prefetch_it++);
103 //    }
104 //    while (prefetch_it != end) {
105 //      delete *begin++;
106 //      absl::PrefetchToLocalCacheNta(*prefetch_it++);
107 //    }
108 //    while (begin != end) {
109 //      delete *begin++;
110 //    }
111 //  }
112 //
113 void PrefetchToLocalCacheNta(const void* addr);
114 
115 // Moves data into the L1 cache with the intent to modify it.
116 //
117 // This function is similar to `PrefetchToLocalCache()` except that it
118 // prefetches cachelines with an 'intent to modify' This typically includes
119 // invalidating cache entries for this address in all other cache tiers, and an
120 // exclusive access intent.
121 //
122 // Incorrect or gratuitous use of this function can degrade performance. As this
123 // function can invalidate cached cachelines on other caches and computer cores,
124 // incorrect usage of this function can have an even greater negative impact
125 // than incorrect regular prefetches.
126 // Use this function only when representative benchmarks show an improvement.
127 //
128 // Example:
129 //
130 //  void* Arena::Allocate(size_t size) {
131 //    void* ptr = AllocateBlock(size);
132 //    absl::PrefetchToLocalCacheForWrite(ptr);
133 //    return ptr;
134 //  }
135 //
136 void PrefetchToLocalCacheForWrite(const void* addr);
137 
138 #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
139 
140 #define ABSL_HAVE_PREFETCH 1
141 
142 // See __builtin_prefetch:
143 // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
144 //
PrefetchToLocalCache(const void * addr)145 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
146     const void* addr) {
147   __builtin_prefetch(addr, 0, 3);
148 }
149 
PrefetchToLocalCacheNta(const void * addr)150 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
151     const void* addr) {
152   __builtin_prefetch(addr, 0, 0);
153 }
154 
PrefetchToLocalCacheForWrite(const void * addr)155 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
156     const void* addr) {
157   // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
158   // unless -march=broadwell or newer; this is not generally the default, so we
159   // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
160   // processors and has been present on AMD processors since the K6-2.
161 #if defined(__x86_64__) && !defined(__PRFCHW__)
162   asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
163 #else
164   __builtin_prefetch(addr, 1, 3);
165 #endif
166 }
167 
168 #elif defined(ABSL_INTERNAL_HAVE_SSE)
169 
170 #define ABSL_HAVE_PREFETCH 1
171 
PrefetchToLocalCache(const void * addr)172 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
173     const void* addr) {
174   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
175 }
176 
PrefetchToLocalCacheNta(const void * addr)177 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
178     const void* addr) {
179   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
180 }
181 
PrefetchToLocalCacheForWrite(const void * addr)182 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
183     const void* addr) {
184 #if defined(_MM_HINT_ET0)
185   _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
186 #elif !defined(_MSC_VER) && defined(__x86_64__)
187   // _MM_HINT_ET0 is not universally supported. As we commented further
188   // up, PREFETCHW is recognized as a no-op on older Intel processors
189   // and has been present on AMD processors since the K6-2. We have this
190   // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
191   asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
192 #endif
193 }
194 
195 #else
196 
PrefetchToLocalCache(const void * addr)197 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
198     const void* addr) {}
PrefetchToLocalCacheNta(const void * addr)199 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
200     const void* addr) {}
PrefetchToLocalCacheForWrite(const void * addr)201 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
202     const void* addr) {}
203 
204 #endif
205 
206 ABSL_NAMESPACE_END
207 }  // namespace absl
208 
209 #endif  // ABSL_BASE_PREFETCH_H_
210