// Copyright (c) Facebook, Inc. and its affiliates. // All rights reserved. // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #pragma once #include #include "minpybind.h" #ifdef _WIN32 #include // https://stackoverflow.com/questions/355967/how-to-use-msvc-intrinsics-to-get-the-equivalent-of-this-gcc-code inline unsigned int __builtin_clz(unsigned int x) { unsigned long r = 0; _BitScanReverse(&r, x); return (31 - r); } #endif inline int round2min8(int num) { int nzeros = __builtin_clz((num - 1)|4); return 1 << (32 - nzeros); } struct Arena; template struct OwnedSlice; template struct Slice { Slice() : begin_(nullptr), size_(0), capacity_(0) {} template Slice(Arena& arena, Args&&... args); T* begin() const { return begin_; } T* end() const { return begin_ + size_; } int size() const { return size_; } int capacity() const { return capacity_; } T& back(int i=-1) { return begin_[size_ + i]; } T& operator[](int i) const { return begin_[i]; } std::optional index(const T& value) { for (int i : enumerate()) { if (begin_[i] == value) { return i; } } return std::nullopt; } bool contains(const T& value) { return index(value).has_value(); } void insert(Arena& arena, Slice where, Slice to_insert); void insert(Arena& arena, Slice where, T v) { return insert(arena, where, Slice(&v, &v + 1)); } void insert(Arena& arena, int where, T v) { return insert(arena, slice(where, where), v); } void append(Arena& arena, T value); void extend(Arena& arena, Slice to_insert); void extend(Arena& arena, const T* begin, const T* end) { return extend(arena, Slice((T*)begin, (T*)end)); } bool remove(Arena& A, T value) { auto idx = index(value); if (idx) { insert(A, slice(*idx, *idx + 1), Slice()); } return idx.has_value(); } Slice slice(int begin) { return slice(begin, size_); } Slice slice(int begin, int end) { if (begin < 0) { begin += size_; } if (end < 0) { end += size_; } Slice result; result.begin_ = begin_ + begin; result.size_ = end - begin; result.capacity_ = result.size_; return result; } bool inside(Slice where) { return begin() <= where.begin() && where.end() <= end(); } irange enumerate() const { return irange(size_); } irange reversed_enumerate() const { return irange(size_ - 1, -1, -1); } bool operator==(const Slice& rhs) const { if (size() != rhs.size()) { return false; } return std::equal(begin(), end(), rhs.begin()); } Slice(T* begin, T* end) : begin_(begin), size_(end - begin), capacity_(size_) {} protected: static int _length(const T& t) { return 1; } static int _length(Slice t) { return t.size_; } static T* _insert(T*& dst, T t) { *dst = std::move(t); return ++dst; } static T* _insert(T*& dst, Slice t) { std::memcpy(dst, t.begin_, sizeof(T)*t.size_); dst += t.size_; return dst; } T* begin_; int size_; int capacity_; friend struct OwnedSlice; }; template struct OwnedSlice { typedef void (*deleter_t)(Slice); static void _no_delete(Slice) {} OwnedSlice() : deleter_(_no_delete) {} OwnedSlice(const OwnedSlice&) = delete; OwnedSlice& operator=(const OwnedSlice&) = delete; ~OwnedSlice() { deleter_(slice_); if (slice_.size_ > 8) { delete [] slice_.begin_; } } void set(Slice to_own, deleter_t deleter = _no_delete) { slice_.size_ = slice_.capacity_ = to_own.size(); slice_.begin_ = (slice_.size_ > 8) ? new T[slice_.size_] : &small_buf[0]; std::memcpy(slice_.begin_, to_own.begin(), slice_.size_ * sizeof(T)); deleter_ = deleter; } Slice slice() const { return slice_; } private: Slice slice_; deleter_t deleter_; T small_buf[8]; }; template inline std::ostream& operator<<(std::ostream& s, const Slice& v) { s << "["; for (int i : v.enumerate()) { if (i > 0) { s << ", "; } s << v[i]; } s << "]"; return s; } struct TensorRef { TensorRef() : impl_(nullptr){} TensorRef(const at::Tensor& t) : impl_(t.unsafeGetTensorImpl()) {} const at::Tensor& operator*() const { return *(at::Tensor*)this; } at::Tensor* operator->() const { return (at::Tensor*)this; } operator bool() const { return impl_ != nullptr; } private: at::TensorImpl* impl_; }; constexpr int ARENA_MAX_SIZE = 4096; constexpr int ALIGNMENT = 8; struct Arena { Arena() : allocated_(0) {} template T* allocate(int n) { if (!n) { return nullptr; } int to_allocate = sizeof(T)*n; int to_allocate_rounded = ALIGNMENT * ((to_allocate - 1) / ALIGNMENT + 1); auto prev_allocated = allocated_; allocated_ += to_allocate_rounded; if (C10_UNLIKELY_OR_CONST(allocated_ > ARENA_MAX_SIZE)) { overflow_.emplace_back(new char[to_allocate]); return (T*) &overflow_.back()[0]; } return (T*) (buffer_ + prev_allocated); } TensorRef autorelease(at::Tensor s) { auto ref = TensorRef(s); s.unsafeReleaseTensorImpl(); ar_tensors_.append(*this, ref); return ref; } mpy::handle autorelease(mpy::object obj) { ar_objects_.append(*this, obj); obj.release(); return ar_objects_.back(); } ~Arena() { for(TensorRef t: ar_tensors_) { c10::intrusive_ptr::reclaim(t->unsafeGetTensorImpl()); } for(mpy::handle h: ar_objects_) { mpy::object::steal(h); } } private: int64_t allocated_; char buffer_[ARENA_MAX_SIZE]; Slice ar_tensors_; Slice ar_objects_; std::vector> overflow_; }; template inline void Slice::insert(Arena& arena, Slice where, Slice to_insert) { AT_ASSERT(inside(where)); Slice result = *this; /// b------sb---se-----e, 0----n T* body_dest = where.begin(); if (where.size() != to_insert.size()) { int new_size = size() - where.size() + to_insert.size(); T* tail_dest = where.begin() + to_insert.size(); if (new_size >= capacity_) { int new_capacity = new_size ? round2min8(new_size) : 0; result.capacity_ = new_capacity; result.begin_ = arena.allocate(new_capacity); body_dest = result.begin_ + (where.begin() - begin()); tail_dest = body_dest + to_insert.size(); //std::memcpy(result.begin_, begin_, sizeof(T)*(where.begin() - begin())); std::copy(begin_, begin_ + (where.begin() - begin()), result.begin_); } std::memmove(tail_dest, where.end(), sizeof(T)*(end() - where.end())); result.size_ = new_size; } //std::memcpy(body_dest, to_insert.begin(), sizeof(T)*to_insert.size()); std::copy(to_insert.begin(), to_insert.end(), body_dest); *this = result; } template inline void Slice::append(Arena& arena, T value) { Slice result = *this; if (size_ == capacity_) { int new_size = size_ ? round2min8(size_)*2 : 8; T* n = arena.allocate(new_size); //memcpy(n, begin_, size_*sizeof(T)); std::copy(begin_, begin_ + size_, n); result.begin_ = n; result.capacity_ = new_size; } result[result.size_++] = std::move(value); *this = result; } template inline void Slice::extend(Arena& arena, Slice rhs) { Slice result = *this; result.size_ = size_ + rhs.size(); if (result.size_ > capacity_) { int new_size = round2min8(result.size_); T* n = arena.allocate(new_size); //memcpy(n, begin_, size_*sizeof(T)); std::copy(begin_, begin_+size_, n); result.begin_ = n; result.capacity_ = new_size; } //memcpy(result.begin_ + size_, rhs.begin(), sizeof(T)*rhs.size()); std::copy(rhs.begin(), rhs.end(), result.begin_ + size_); *this = result; } template template Slice::Slice(Arena& arena, Args&&... args) { int lens[] = {_length(args)...}; size_ = 0; for (auto i : lens) { size_ += i; } capacity_ = size_ ? round2min8(size_) : 0; begin_ = arena.allocate(capacity_); T* dst_ = begin_; T* unused[] = {_insert(dst_, args)...}; (void) unused; }