xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/hlo_runner.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #define EIGEN_USE_THREADS
16 
17 #include "tensorflow/compiler/xla/service/hlo_runner.h"
18 
19 #include <memory>
20 #include <string>
21 #include <utility>
22 
23 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
24 #include "tensorflow/compiler/xla/layout_util.h"
25 #include "tensorflow/compiler/xla/service/executable.h"
26 #include "tensorflow/compiler/xla/service/hlo_module_group.h"
27 #include "tensorflow/compiler/xla/service/hlo_module_util.h"
28 #include "tensorflow/compiler/xla/service/hlo_parser.h"
29 #include "tensorflow/compiler/xla/service/transfer_manager.h"
30 #include "tensorflow/compiler/xla/shape.h"
31 #include "tensorflow/compiler/xla/shape_util.h"
32 #include "tensorflow/core/lib/core/blocking_counter.h"
33 #include "tensorflow/core/platform/logging.h"
34 
35 namespace xla {
36 
HloRunner(se::Platform * platform,int intra_op_parallelism_threads)37 HloRunner::HloRunner(se::Platform* platform, int intra_op_parallelism_threads) {
38   BackendOptions backend_options;
39   backend_options.set_platform(platform);
40   backend_options.set_intra_op_parallelism_threads(
41       intra_op_parallelism_threads);
42   backend_ = Backend::CreateBackend(backend_options).value();
43   device_shape_representation_fn_ = [this](const Shape& shape) {
44     return backend_->compiler()->DefaultDeviceShapeRepresentation(shape);
45   };
46   VLOG(1) << "Created HloRunner for platform: " << platform->Name();
47 }
48 
~HloRunner()49 HloRunner::~HloRunner() {}
50 
TransferLiteralToDevice(const Literal & literal)51 StatusOr<ScopedShapedBuffer> HloRunner::TransferLiteralToDevice(
52     const Literal& literal) {
53   TF_ASSIGN_OR_RETURN(
54       ScopedShapedBuffer buffer,
55       backend().transfer_manager()->AllocateScopedShapedBuffer(
56           literal.shape(), backend().memory_allocator(),
57           backend().default_device_ordinal(), device_shape_representation_fn_));
58   TF_ASSIGN_OR_RETURN(
59       auto stream, backend().BorrowStream(backend().default_stream_executor()));
60   TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
61       stream.get(), literal, buffer));
62   return std::move(buffer);
63 }
64 
TransferLiteralsToDevice(absl::Span<const Literal * const> literals)65 StatusOr<std::vector<ScopedShapedBuffer>> HloRunner::TransferLiteralsToDevice(
66     absl::Span<const Literal* const> literals) {
67   std::vector<ScopedShapedBuffer> buffers;
68   buffers.reserve(literals.size());
69   for (const Literal* literal : literals) {
70     CHECK(literal != nullptr);
71     TF_ASSIGN_OR_RETURN(ScopedShapedBuffer buffer,
72                         TransferLiteralToDevice(*literal));
73     buffers.push_back(std::move(buffer));
74   }
75   return std::move(buffers);
76 }
77 
TransferLiteralsToDevice(absl::Span<const Literal> literals)78 StatusOr<std::vector<ScopedShapedBuffer>> HloRunner::TransferLiteralsToDevice(
79     absl::Span<const Literal> literals) {
80   std::vector<const Literal*> literal_pointers;
81   literal_pointers.reserve(literals.size());
82   for (const auto& literal : literals) {
83     literal_pointers.push_back(&literal);
84   }
85   return TransferLiteralsToDevice(literal_pointers);
86 }
87 
TransferLiteralFromDevice(const ShapedBuffer & buffer)88 StatusOr<Literal> HloRunner::TransferLiteralFromDevice(
89     const ShapedBuffer& buffer) {
90   TF_ASSIGN_OR_RETURN(
91       auto stream, backend().BorrowStream(backend().default_stream_executor()));
92   return backend().transfer_manager()->TransferLiteralFromDevice(stream.get(),
93                                                                  buffer);
94 }
95 
Execute(std::unique_ptr<HloModule> module,absl::Span<const Literal * const> arguments,bool run_hlo_passes,ExecutionProfile * profile)96 StatusOr<Literal> HloRunner::Execute(std::unique_ptr<HloModule> module,
97                                      absl::Span<const Literal* const> arguments,
98                                      bool run_hlo_passes,
99                                      ExecutionProfile* profile) {
100   xla::UpdateEntryComputationLayout(module.get(),
101                                     device_shape_representation_fn_);
102 
103   TF_ASSIGN_OR_RETURN(std::vector<ScopedShapedBuffer> argument_buffers,
104                       TransferLiteralsToDevice(arguments));
105   TF_ASSIGN_OR_RETURN(ExecutionOutput result,
106                       ExecuteWithDeviceBuffers(
107                           /*module=*/std::move(module),
108                           /*arguments=*/argument_buffers,
109                           /*run_hlo_passes=*/run_hlo_passes,
110                           /*profile=*/profile));
111   return TransferLiteralFromDevice(result.Result());
112 }
113 
ExecuteWithExecutable(Executable * executable,absl::Span<const Literal * const> arguments,ExecutionProfile * profile)114 StatusOr<Literal> HloRunner::ExecuteWithExecutable(
115     Executable* executable, absl::Span<const Literal* const> arguments,
116     ExecutionProfile* profile) {
117   TF_ASSIGN_OR_RETURN(std::vector<ScopedShapedBuffer> argument_buffers,
118                       TransferLiteralsToDevice(arguments));
119   TF_ASSIGN_OR_RETURN(ExecutionOutput result,
120                       ExecuteWithDeviceBuffers(
121                           /*executable=*/executable,
122                           /*arguments=*/argument_buffers,
123                           /*profile=*/profile));
124   return TransferLiteralFromDevice(result.Result());
125 }
126 
127 // Convert the owning buffer of inputs into a (partially) owning vector of
128 // ExecutionInputs, and an owning vector of `OwningDeviceMemory`'s.
ExecutionInputsFromScopedShapedBuffers(absl::Span<ScopedShapedBuffer const> inputs,HloInputOutputAliasConfig alias_config,int device_ordinal,se::DeviceMemoryAllocator * allocator)129 static std::vector<ExecutionInput> ExecutionInputsFromScopedShapedBuffers(
130     absl::Span<ScopedShapedBuffer const> inputs,
131     HloInputOutputAliasConfig alias_config, int device_ordinal,
132     se::DeviceMemoryAllocator* allocator) {
133   std::vector<ExecutionInput> execution_inputs;
134   std::vector<se::OwningDeviceMemory> owned_args;
135 
136   for (int param_num = 0; param_num < inputs.size(); param_num++) {
137     const ScopedShapedBuffer& input_buffer = inputs[param_num];
138     ShapeTree<MaybeOwningDeviceMemory> buffer_tree(
139         input_buffer.on_device_shape());
140 
141     input_buffer.buffers().ForEachElement(
142         [&](const ShapeIndex& index,
143             const se::DeviceMemoryBase& execution_input_buffer) {
144           if (alias_config.ParameterHasAlias(param_num, index)) {
145             // Store owned.
146             *buffer_tree.mutable_element(index) = se::OwningDeviceMemory{
147                 execution_input_buffer, device_ordinal, allocator};
148           } else {
149             // Store unowned.
150             *buffer_tree.mutable_element(index) = execution_input_buffer;
151           }
152         });
153     execution_inputs.emplace_back(std::move(buffer_tree));
154   }
155   return execution_inputs;
156 }
157 
ExecuteWithDeviceBuffers(std::unique_ptr<HloModule> module,absl::Span<ScopedShapedBuffer const> arguments,bool run_hlo_passes,ExecutionProfile * profile)158 StatusOr<ExecutionOutput> HloRunner::ExecuteWithDeviceBuffers(
159     std::unique_ptr<HloModule> module,
160     absl::Span<ScopedShapedBuffer const> arguments, bool run_hlo_passes,
161     ExecutionProfile* profile) {
162   TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable,
163                       CreateExecutable(std::move(module), run_hlo_passes));
164   return ExecuteWithDeviceBuffers(executable.get(), arguments, profile);
165 }
166 
ExecuteWithDeviceBuffers(Executable * executable,absl::Span<ScopedShapedBuffer const> arguments,ExecutionProfile * profile)167 StatusOr<ExecutionOutput> HloRunner::ExecuteWithDeviceBuffers(
168     Executable* executable, absl::Span<ScopedShapedBuffer const> arguments,
169     ExecutionProfile* profile) {
170   xla::UpdateEntryComputationLayout(&executable->module(),
171                                     device_shape_representation_fn_);
172 
173   // Get service run options.
174   se::Stream stream(backend().default_stream_executor());
175   stream.Init();
176   ServiceExecutableRunOptions service_run_options =
177       GetServiceRunOptionsForDevice(backend().default_device_ordinal(), &stream,
178                                     nullptr, RunId());
179   service_run_options.mutable_run_options()->set_execution_profile(profile);
180 
181   std::vector<ExecutionInput> execution_arguments =
182       ExecutionInputsFromScopedShapedBuffers(
183           arguments, executable->module().input_output_alias_config(),
184           stream.parent()->device_ordinal(), stream.parent()->GetAllocator());
185 
186   TF_ASSIGN_OR_RETURN(
187       ExecutionOutput retval,
188       executable->ExecuteOnStreamWrapper(&service_run_options,
189                                          std::move(execution_arguments)));
190   TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
191   return std::move(retval);
192 }
193 
ExecuteReplicated(std::unique_ptr<HloModule> module,const ReplicatedExecuteOptions & options,DeviceAssignment * device_assignment)194 StatusOr<std::vector<Literal>> HloRunner::ExecuteReplicated(
195     std::unique_ptr<HloModule> module, const ReplicatedExecuteOptions& options,
196     DeviceAssignment* device_assignment) {
197   TF_ASSIGN_OR_RETURN(
198       std::unique_ptr<Executable> executable,
199       CreateExecutable(std::move(module), options.run_hlo_passes));
200   return ExecuteReplicated(executable.get(), options, device_assignment);
201 }
202 
ExecuteReplicatedImpl(std::function<StatusOr<std::vector<ScopedShapedBuffer>> (const std::vector<ServiceExecutableRunOptions> &,const std::vector<absl::Span<const ShapedBuffer * const>> &)> execution_helper,std::function<int64_t (int64_t)> argument_count_provider,std::function<const Literal * (int64_t,int64_t)> argument_provider,const ReplicatedExecuteOptions & options,DeviceAssignment * device_assignment)203 StatusOr<std::vector<Literal>> HloRunner::ExecuteReplicatedImpl(
204     std::function<StatusOr<std::vector<ScopedShapedBuffer>>(
205         const std::vector<ServiceExecutableRunOptions>&,
206         const std::vector<absl::Span<const ShapedBuffer* const>>&)>
207         execution_helper,
208     std::function<int64_t(int64_t)> argument_count_provider,
209     std::function<const Literal*(int64_t, int64_t)> argument_provider,
210     const ReplicatedExecuteOptions& options,
211     DeviceAssignment* device_assignment) {
212   std::vector<std::unique_ptr<se::Stream>> streams;
213   std::vector<ServiceExecutableRunOptions> service_run_options;
214   int64_t num_partitions = device_assignment->computation_count();
215 
216   std::vector<ScopedShapedBuffer> argument_buffers;
217   // This reserve() call is necessary for correctness, because
218   // argument_buffer_ptrs contains pointers into the elements of
219   // argument_buffers.
220   const int64_t total_argument_count = [&]() {
221     int64_t total = 0;
222     for (int64_t i = 0; i < options.num_replicas; ++i) {
223       total += argument_count_provider(i);
224     }
225     return total;
226   }();
227   argument_buffers.reserve(total_argument_count);
228 
229   // Plus one so we can safely get &argument_buffer_ptrs[0] in case there are
230   // no arguments.
231   std::vector<const ShapedBuffer*> argument_buffer_ptrs(total_argument_count +
232                                                         1);
233   std::vector<absl::Span<const ShapedBuffer* const>> argument_buffer_slices;
234   int64_t index = 0;
235   RunId run_id;
236   for (int64_t i = 0; i < options.num_replicas; ++i) {
237     int64_t device =
238         (*device_assignment)(i / num_partitions, i % num_partitions);
239     TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
240                         backend().stream_executor(device));
241     streams.push_back(std::make_unique<se::Stream>(executor));
242     streams.back()->Init();
243     service_run_options.emplace_back(GetServiceRunOptionsForDevice(
244         device, streams.back().get(), device_assignment, run_id));
245 
246     // Copy arguments to device.
247     const int64_t argument_count = argument_count_provider(i);
248     for (int64_t arg_index = 0; arg_index < argument_count; arg_index++) {
249       const Literal* const argument = argument_provider(i, arg_index);
250       TF_RET_CHECK(argument != nullptr);
251       TF_ASSIGN_OR_RETURN(
252           ScopedShapedBuffer argument_buffer,
253           backend().transfer_manager()->AllocateScopedShapedBuffer(
254               argument->shape(), backend().memory_allocator(), device,
255               device_shape_representation_fn_));
256       TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
257           streams.back().get(), *argument, argument_buffer));
258       argument_buffers.push_back(std::move(argument_buffer));
259       argument_buffer_ptrs[index++] = &argument_buffers.back();
260     }
261     argument_buffer_slices.emplace_back(
262         &argument_buffer_ptrs[index - argument_count], argument_count);
263   }
264 
265   std::unique_ptr<tensorflow::thread::ThreadPool> pool;
266   TF_RET_CHECK(options.infeed_values.empty() ||
267                options.infeed_values.size() == options.num_replicas);
268   int64_t num_threads = options.infeed_values.size();
269   if (ShapeUtil::IsInitialized(options.outfeed_shape)) {
270     num_threads += options.num_replicas;
271   }
272   if (num_threads > 0) {
273     pool = std::make_unique<tensorflow::thread::ThreadPool>(
274         tensorflow::Env::Default(), "infeed_outfeed",
275         /*num_threads=*/num_threads);
276   }
277   if (!options.infeed_values.empty()) {
278     for (int64_t i = 0; i < options.num_replicas; ++i) {
279       int64_t device =
280           (*device_assignment)(i / num_partitions, i % num_partitions);
281       pool->Schedule([this, device, &options, i]() {
282         se::StreamExecutor* executor =
283             backend().stream_executor(device).ValueOrDie();
284         VLOG(1) << "Starting infeed on device " << device;
285         for (int64_t step = 1;
286              options.infeed_steps < 0 || step <= options.infeed_steps; ++step) {
287           TF_CHECK_OK(backend().transfer_manager()->TransferLiteralToInfeed(
288               executor, *options.infeed_values[i]));
289           if (step % 100 == 0) {
290             VLOG(1) << "Infeed step " << step;
291           }
292         }
293       });
294     }
295   }
296   if (ShapeUtil::IsInitialized(options.outfeed_shape)) {
297     if (options.outfeed_values) {
298       options.outfeed_values->resize(options.num_replicas);
299     }
300     for (int64_t i = 0; i < options.num_replicas; ++i) {
301       int64_t device =
302           (*device_assignment)(i / num_partitions, i % num_partitions);
303       pool->Schedule([this, device, &options, i]() {
304         se::StreamExecutor* executor =
305             backend().stream_executor(device).ValueOrDie();
306         VLOG(1) << "Starting outfeed on device " << device;
307         for (int64_t step = 1;
308              options.infeed_steps < 0 || step <= options.infeed_steps; ++step) {
309           Literal literal(options.outfeed_shape);
310           TF_CHECK_OK(backend().transfer_manager()->TransferLiteralFromOutfeed(
311               executor, &literal));
312           if (options.outfeed_values) {
313             options.outfeed_values->at(i) = std::move(literal);
314           }
315           if (step % 100 == 0) {
316             VLOG(1) << "Outfeed step " << step;
317           }
318         }
319       });
320     }
321   }
322 
323   LOG(INFO) << "Replicated execution started";
324   TF_ASSIGN_OR_RETURN(
325       std::vector<ScopedShapedBuffer> results,
326       execution_helper(service_run_options, argument_buffer_slices));
327   LOG(INFO) << "Replicated execution terminated";
328 
329   std::vector<Literal> exec_results;
330   exec_results.reserve(options.num_replicas);
331   for (int64_t i = 0; i < options.num_replicas; ++i) {
332     TF_RETURN_IF_ERROR(streams[i]->BlockHostUntilDone());
333     TF_ASSIGN_OR_RETURN(Literal literal,
334                         backend().transfer_manager()->TransferLiteralFromDevice(
335                             streams[i].get(), results[i]));
336     exec_results.push_back(std::move(literal));
337   }
338   return std::move(exec_results);
339 }
340 
ExecuteReplicated(Executable * executable,const ReplicatedExecuteOptions & options,DeviceAssignment * device_assignment,ExecutionProfile * profile)341 StatusOr<std::vector<Literal>> HloRunner::ExecuteReplicated(
342     Executable* executable, const ReplicatedExecuteOptions& options,
343     DeviceAssignment* device_assignment, ExecutionProfile* profile) {
344   return ExecuteReplicatedImpl(
345       [&](const std::vector<ServiceExecutableRunOptions>& service_run_options,
346           const std::vector<absl::Span<const ShapedBuffer* const>>&
347               argument_buffer_slices)
348           -> StatusOr<std::vector<ScopedShapedBuffer>> {
349         std::vector<ScopedShapedBuffer> results;
350         if (!options.use_threads) {
351           TF_ASSIGN_OR_RETURN(
352               results, executable->ExecuteOnStreams(service_run_options,
353                                                     argument_buffer_slices));
354         } else {
355           absl::Mutex mutex;
356           std::vector<StatusOr<ScopedShapedBuffer>> thread_results(
357               options.num_replicas);
358           {
359             LOG(INFO) << "Creating thread pool for " << options.num_replicas
360                       << " replicas";
361             tensorflow::thread::ThreadPool pool(
362                 tensorflow::Env::Default(), "replicas", options.num_replicas);
363             for (int64_t i = 0; i < options.num_replicas; ++i) {
364               pool.Schedule([&, i] {
365                 auto result = executable->ExecuteOnStream(
366                     &service_run_options[i], argument_buffer_slices[i],
367                     nullptr);
368                 absl::MutexLock lock(&mutex);
369                 thread_results[i] = std::move(result);
370               });
371             }
372 
373             // Note: the thread pool destructor guarantees it completes all work
374             // before we leave this scope.
375           }
376           for (auto& thread_result : thread_results) {
377             if (!thread_result.ok()) {
378               return thread_result.status();
379             }
380             results.push_back(std::move(thread_result).ValueOrDie());
381           }
382         }
383         return results;
384       },
385       [&](int64_t replica) { return options.arguments.size(); },
386       [&](int64_t replica, int64_t index) { return options.arguments[index]; },
387       options, device_assignment);
388 }
389 
ExecuteReplicated(std::function<Executable * (int64_t)> executable_provider,std::function<int64_t (int64_t)> argument_count_provider,std::function<const Literal * (int64_t,int64_t)> argument_provider,const ReplicatedExecuteOptions & options,DeviceAssignment * device_assignment)390 StatusOr<std::vector<Literal>> HloRunner::ExecuteReplicated(
391     std::function<Executable*(int64_t)> executable_provider,
392     std::function<int64_t(int64_t)> argument_count_provider,
393     std::function<const Literal*(int64_t, int64_t)> argument_provider,
394     const ReplicatedExecuteOptions& options,
395     DeviceAssignment* device_assignment) {
396   DeviceAssignment computation_device_assignment;
397   if (device_assignment == nullptr) {
398     TF_ASSIGN_OR_RETURN(
399         computation_device_assignment,
400         backend().computation_placer()->AssignDevices(options.num_replicas, 1));
401     device_assignment = &computation_device_assignment;
402   }
403   CHECK_NE(device_assignment, nullptr);
404   return ExecuteReplicatedImpl(
405       [&](const std::vector<ServiceExecutableRunOptions>& service_run_options,
406           const std::vector<absl::Span<const ShapedBuffer* const>>&
407               argument_buffer_slices)
408           -> StatusOr<std::vector<ScopedShapedBuffer>> {
409         TF_RET_CHECK(options.use_threads);
410         std::vector<ScopedShapedBuffer> results;
411         absl::Mutex mutex;
412         std::vector<StatusOr<ScopedShapedBuffer>> thread_results(
413             options.num_replicas);
414         {
415           LOG(INFO) << "Creating thread pool for " << options.num_replicas
416                     << " replicas";
417           tensorflow::thread::ThreadPool pool(tensorflow::Env::Default(),
418                                               "replicas", options.num_replicas);
419           for (int64_t i = 0; i < options.num_replicas; ++i) {
420             for (const auto& arg : argument_buffer_slices[i]) {
421               TF_RET_CHECK(arg != nullptr);
422             }
423             pool.Schedule([&, i] {
424               auto result = executable_provider(i)->ExecuteOnStream(
425                   &service_run_options[i], argument_buffer_slices[i], nullptr);
426               absl::MutexLock lock(&mutex);
427               thread_results[i] = std::move(result);
428             });
429           }
430 
431           // Note: the thread pool destructor guarantees it completes all work
432           // before we leave this scope.
433         }
434         for (auto& thread_result : thread_results) {
435           if (!thread_result.ok()) {
436             return thread_result.status();
437           }
438           results.push_back(std::move(thread_result).ValueOrDie());
439         }
440         return results;
441       },
442       argument_count_provider, argument_provider, options, device_assignment);
443 }
444 
ExecuteReplicated(std::unique_ptr<HloModule> module,const ReplicatedExecuteOptions & options)445 StatusOr<std::vector<Literal>> HloRunner::ExecuteReplicated(
446     std::unique_ptr<HloModule> module,
447     const ReplicatedExecuteOptions& options) {
448   TF_ASSIGN_OR_RETURN(
449       DeviceAssignment device_assignment,
450       backend().computation_placer()->AssignDevices(options.num_replicas, 1));
451   return ExecuteReplicated(std::move(module), options, &device_assignment);
452 }
453 
CreateExecutable(std::unique_ptr<HloModule> module,bool run_hlo_passes)454 StatusOr<std::unique_ptr<Executable>> HloRunner::CreateExecutable(
455     std::unique_ptr<HloModule> module, bool run_hlo_passes) {
456   xla::UpdateEntryComputationLayout(module.get(),
457                                     device_shape_representation_fn_);
458   if (run_hlo_passes) {
459     auto module_group = std::make_unique<HloModuleGroup>(std::move(module));
460     TF_ASSIGN_OR_RETURN(
461         auto executables,
462         backend().compiler()->Compile(std::move(module_group),
463                                       {{backend().default_stream_executor()}},
464                                       backend().memory_allocator()));
465     return std::move(executables[0]);
466   }
467   return backend().compiler()->RunBackend(std::move(module),
468                                           backend().default_stream_executor(),
469                                           backend().memory_allocator());
470 }
471 
GetServiceRunOptionsForDevice(int64_t device,se::Stream * stream,DeviceAssignment * device_assignment,RunId run_id)472 ServiceExecutableRunOptions HloRunner::GetServiceRunOptionsForDevice(
473     int64_t device, se::Stream* stream, DeviceAssignment* device_assignment,
474     RunId run_id) {
475   ExecutableRunOptions run_options;
476   run_options.set_device_ordinal(device);
477   run_options.set_stream(stream);
478   run_options.set_allocator(backend().memory_allocator());
479   run_options.set_intra_op_thread_pool(
480       backend().eigen_intra_op_thread_pool_device());
481   if (device_assignment != nullptr) {
482     run_options.set_device_assignment(device_assignment);
483   }
484   run_options.set_run_id(run_id);
485   return ServiceExecutableRunOptions(run_options, backend().StreamBorrower());
486 }
487 
backend()488 Backend& HloRunner::backend() {
489   if (!backend_) {
490     backend_ = Backend::CreateDefaultBackend().value();
491     VLOG(1) << "Executing on platform " << backend().platform()->Name();
492   }
493   return *backend_;
494 }
495 
backend() const496 const Backend& HloRunner::backend() const {
497   return const_cast<HloRunner*>(this)->backend();
498 }
499 
Name() const500 absl::string_view HloRunner::Name() const {
501   return backend_->platform()->Name();
502 }
503 
504 }  // namespace xla
505