xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/cpu_function_runtime.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_CPU_FUNCTION_RUNTIME_H_
17 #define TENSORFLOW_COMPILER_XLA_CPU_FUNCTION_RUNTIME_H_
18 
19 #include <stdint.h>
20 
21 #include <cassert>
22 #include <cstdlib>
23 #include <utility>
24 
25 namespace xla {
26 namespace cpu_function_runtime {
27 // Stores information about one buffer used by an XLA:CPU compiled function.
28 // These buffers are used for holding inputs to the computation, outputs from
29 // the computation and as temporary scratch space.
30 class BufferInfo {
31  public:
32   // Creates a BufferInfo from a serialized encoding generated by `Encode`.
BufferInfo(std::pair<uint64_t,uint64_t> encoding)33   explicit BufferInfo(std::pair<uint64_t, uint64_t> encoding)
34       : entry_param_number_(encoding.second) {
35     Kind kind;
36     uint64_t size;
37     Unpack(encoding.first, &kind, &size);
38     kind_ = kind;
39     size_ = size;
40   }
41 
42   // Returns true if this buffer stores a constant.  These never need to be
43   // allocated by the runtime.
is_constant()44   bool is_constant() const { return kind() == Kind::kConstant; }
45 
46   // Returns true if this buffer stores an entry parameter.  These may or may
47   // not need to be allocated by the runtime, depending on
48   // XlaCompiledCpuFunction::AllocMode.
is_entry_parameter()49   bool is_entry_parameter() const { return kind() == Kind::kEntryParameter; }
50 
51   // Returns the entry parameter number of this buffer.
entry_parameter_number()52   uint64_t entry_parameter_number() const {
53     assert(is_entry_parameter());
54     return entry_param_number_;
55   }
56 
57   // Returns true if this buffer is temporary scratch space required by the XLA
58   // computations.  These are always allocated by the runtime.
is_temp_buffer()59   bool is_temp_buffer() const { return kind() == Kind::kTempBuffer; }
60 
61   // Returns true if this buffer is allocated on the C stack or into registers.
62   // These buffers are never allocated by the runtime.
is_on_stack_buffer()63   bool is_on_stack_buffer() const { return kind() == Kind::kOnStackBuffer; }
64 
65   // Returns the size for this buffer.
size()66   uint64_t size() const { return size_; }
67 
68   // Encodes this BufferInfo into two 64 bit integers that can be used to
69   // reconstruct the BufferInfo later using the constructor.  We need this
70   // because we use BufferInfo in places where using protocol buffers would
71   // negatively impact binary size.
Encode()72   std::pair<uint64_t, uint64_t> Encode() const {
73     static_assert(sizeof(*this) == 16, "");
74     uint64_t upper = Pack(kind(), size_);
75     uint64_t lower = entry_param_number_;
76     return {upper, lower};
77   }
78 
79   bool operator==(const BufferInfo& buffer_info) const {
80     if (kind() != buffer_info.kind() || size() != buffer_info.size()) {
81       return false;
82     }
83     return !is_entry_parameter() ||
84            entry_parameter_number() == buffer_info.entry_parameter_number();
85   }
86 
87   // Factory methods:
88 
MakeTempBuffer(uint64_t size)89   static BufferInfo MakeTempBuffer(uint64_t size) {
90     return BufferInfo(Kind::kTempBuffer, /*size=*/size,
91                       /*entry_param_number=*/-1);
92   }
MakeConstant(uint64_t size)93   static BufferInfo MakeConstant(uint64_t size) {
94     return BufferInfo(Kind::kConstant, /*size=*/size,
95                       /*entry_param_number=*/-1);
96   }
MakeEntryParameter(uint64_t size,uint64_t param_number)97   static BufferInfo MakeEntryParameter(uint64_t size, uint64_t param_number) {
98     return BufferInfo(Kind::kEntryParameter, /*size=*/size,
99                       /*entry_param_number=*/param_number);
100   }
MakeOnStackBuffer(uint64_t size)101   static BufferInfo MakeOnStackBuffer(uint64_t size) {
102     return BufferInfo(Kind::kOnStackBuffer, /*size=*/size,
103                       /*entry_param_number=*/-1);
104   }
105 
106  private:
107   BufferInfo() = default;
108 
109   enum class Kind : uint64_t {
110     kConstant,
111     kTempBuffer,
112     kEntryParameter,
113     kOnStackBuffer
114   };
115 
kind()116   Kind kind() const { return static_cast<Kind>(kind_); }
117 
BufferInfo(Kind kind,uint64_t size,uint64_t entry_param_number)118   explicit BufferInfo(Kind kind, uint64_t size, uint64_t entry_param_number)
119       : kind_(kind), size_(size), entry_param_number_(entry_param_number) {}
120 
Pack(Kind kind,uint64_t size)121   static uint64_t Pack(Kind kind, uint64_t size) {
122     return (static_cast<uint64_t>(size) << 2) | static_cast<uint64_t>(kind);
123   }
124 
Unpack(uint64_t packed,Kind * kind,uint64_t * size)125   static void Unpack(uint64_t packed, Kind* kind, uint64_t* size) {
126     *size = packed >> 2;
127     *kind = static_cast<Kind>((packed << 62) >> 62);
128   }
129 
130   Kind kind_ : 2;
131   uint64_t size_ : 62;
132   int64_t entry_param_number_;
133 };
134 
135 // Align to 64-bytes, to mimic tensorflow::Allocator::kAllocatorAlignment.
Align()136 inline constexpr size_t Align() { return 64; }
137 
138 // The minimum alignment of buffers passed to XLA:CPU.
MinAlign()139 inline constexpr size_t MinAlign() { return 16; }
140 
141 // When declaring variables that will be passed to an XLA instance as input via
142 // set_arg_data(), be it a regular input or a resource variable in the graph,
143 // the C++ variables must be aligned.
144 //
145 // Example usage:
146 //   XLA_ALIGN std::array<float, 4> arg_x;
147 //   XLA_ALIGN float arg_y;
148 //   xla_instance.set_arg_data(0, arg_x.date());
149 //   xla_instance.set_arg_data(0, &arg_y);
150 #define XLA_ALIGN alignas(xla::cpu_function_runtime::Align())
151 
152 // AlignedBufferBytes returns the sum of the size of each buffer in
153 // `buffer_infos`, skipping constants, on-stack buffers and, if
154 // allocate_entry_params is false, entry parameters.  There are `n` entries in
155 // `buffer_infos`.  Each buffer is aligned to Align() byte boundaries.
156 size_t AlignedBufferBytes(const BufferInfo* buffer_infos, size_t n,
157                           bool allocate_entry_params);
158 
159 // MallocContiguousBuffers allocates buffers for use by the entry point
160 // generated by tfcompile.  There are `n` entries in `buffer_infos`.  If
161 // `annotate_initialized` is set, the allocated memory will be annotated as
162 // having been initialized - this is useful when allocating temporary buffers.
163 // If allocate_entry_params is true then allocates temp buffers and entry
164 // parameters, otherwise allocated only temp buffers.  Slots in `bufs`
165 // corresponding to unallocated buffers are set to nullptr.
166 //
167 // A single contiguous block of memory is allocated, and portions of it are
168 // parceled out into `bufs`, which must have space for `n` entries.  Returns
169 // the head of the allocated contiguous block, which should be passed to
170 // FreeContiguous when the buffers are no longer in use.
171 void* MallocContiguousBuffers(const BufferInfo* buffer_infos, size_t n,
172                               bool allocate_entry_params, void** bufs,
173                               bool annotate_initialized);
174 
175 // FreeContiguous frees the contiguous block of memory allocated by
176 // MallocContiguousBuffers.
177 void FreeContiguous(void* contiguous);
178 }  // namespace cpu_function_runtime
179 }  // namespace xla
180 
181 #endif  // TENSORFLOW_COMPILER_XLA_CPU_FUNCTION_RUNTIME_H_
182