c10/mobile/CPUProfilingAllocator.h

#pragma once

#include <c10/macros/Export.h>
#include <c10/util/flat_hash_map.h>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>

namespace c10 {

/*
 * Given a sequence of allocations in a thread, AllocationPlan records
 * 1. size of each allocation
 * 2. Lifetime of each allocation.
 * 3. allocation offsets: Memory offset for each allocation in a single blob of
 * memory
 * 4. Total size of a blob of memory required to satisfy all the allocations.
 */
class C10_API AllocationPlan {
 private:
  // Records size of each allocation by their sequential allocation ids.
  std::vector<uint64_t> allocation_sizes;
  // This maps one allocation id (X) to another allocation id (Y).
  // Allocation X is alive until allocation Y. From allocation Y onwards
  // allocation X is not referenced.
  // Thus Y is the id of the first allocation after X is freed.
  // NB: When an allocation is recorded, along with recording its size,
  // we also set the lifetime to be numeric_limits::max()
  // This is to track allocations that are made during the scope of
  // profiling but were not freed until after the scope ended.
  // Such allocations are not managed by profiling allocator.
  std::vector<uint64_t> allocation_lifetimes;
  // Maps an allocation to some offset in a blob of memory.
  std::vector<uint64_t> allocation_offsets;
  uint64_t total_size{0};
  void clear();
  friend class AllocationPlanner;
  friend class CPUProfilingAllocator;
};

/*
 * Map of memory ptr to allocation id. This is auxiliary information only
 * used to establish lifetime of allocations.
 */
class C10_API AllocationPlanner {
 private:
  AllocationPlan* allocation_plan_{nullptr};
  // Maps allocated ptr to its allocation id.
  // This is used when freeing the memory to look up the allocation id
  // in order to establish the lifetime of a particular allocation.
  ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
  uint64_t allocation_id_{0};
  bool validation_mode_{false};

  bool validate_allocation(const uint64_t size, const void* ptr);
  bool validate_free(const void* ptr);

 public:
  bool validation_success{true};

  AllocationPlanner() = delete;
  AllocationPlanner(AllocationPlan* plan, bool validate = false)
      : allocation_plan_(plan), validation_mode_(validate) {}
  void record_allocation(const uint64_t size, const void* ptr);
  void record_free(const void* ptr);
  void formulate_plan();
  void clear();
};

// NOT THREAD SAFE profiling allocator.
class C10_API CPUProfilingAllocator {
 private:
  const AllocationPlan* plan_{nullptr};
  uint64_t allocation_id_{0};
  uint64_t current_size_{0};
  void* blob_{nullptr};
  ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;

 public:
  ~CPUProfilingAllocator();
  void set_plan(const AllocationPlan* plan);
  void unset_plan();
  void* allocate(const size_t bytes);
  void free(void* const ptr);
};

/*
 * Usage: Profile allocations made by one run of the model.
 * AllocationPlan plan;
 * {
 *   WithProfileAllocationGuard profile_guard(&plan);
 *   module.forward(...);
 * }
 * plan now contains allocation plan.
 */
class C10_API WithProfileAllocationsGuard {
 public:
  WithProfileAllocationsGuard(AllocationPlan* plan);
  ~WithProfileAllocationsGuard();

 private:
  std::unique_ptr<AllocationPlanner> planner_;
};

/*
 * Usage: Validate allocation plan made with WithProfileAllocationGuard
 * bool plan_validation_success, success = true;
 * for (some number of representative inputs)
 * {
 *   WithValidateAllocationPlanGuard(&plan, &plan_validation_success);
 *   module.forward(...);
 *   success = success && plan_validation_success;
 * }
 * success == true means allocations are according to plan
 * else for some inputs allocation pattern changed.
 */
class C10_API WithValidateAllocationPlanGuard {
 public:
  WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success);
  ~WithValidateAllocationPlanGuard();

 private:
  std::unique_ptr<AllocationPlanner> planner_;
  bool* success_;
};

AllocationPlanner* GetThreadLocalAllocationPlanner();

/*
 * Usage: Allocate tensors accordingly to allocation plan
 * First make allocation plan.
 *  See WithProfileAllocationsGuard usage.
 * Second validate allocation plan.
 *  See WithValidateAllocationPlanGuard usage.
 * CPUProfilingAllocator profiling_allocator;
 * {
 *   WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan);
 *   module.forward(...);
 * }
 */
class C10_API WithProfilingAllocatorGuard {
 public:
  WithProfilingAllocatorGuard(
      CPUProfilingAllocator* allocator,
      const AllocationPlan* plan);
  ~WithProfilingAllocatorGuard();
};

CPUProfilingAllocator* GetThreadLocalProfilingAllocator();

} // namespace c10