micro_allocator.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
  13. #define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
  14. #include <cstddef>
  15. #include <cstdint>
  16. #include "tensorflow/lite/c/common.h"
  17. #include "tensorflow/lite/core/api/error_reporter.h"
  18. #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
  19. #include "tensorflow/lite/micro/compatibility.h"
  20. #include "tensorflow/lite/micro/flatbuffer_utils.h"
  21. #include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
  22. #include "tensorflow/lite/micro/simple_memory_allocator.h"
  23. #include "tensorflow/lite/schema/schema_generated.h"
  24. namespace tflite {
  25. // TODO(b/199402574): rename to tflite_internal or just remove internal
  26. // namespace.
  27. namespace internal {
  28. // Sets up all of the data structure members for a TfLiteTensor based on the
  29. // contents of a serialized tensor in the flatbuffer.
  30. // TODO(b/162311891): Drop this method when the interpreter has an API for
  31. // returning buffers on TfLiteEvalTensor.
  32. TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
  33. IPersistentBufferAllocator* persistent_buffer_allocator,
  34. INonPersistentBufferAllocator* non_persistent_buffer_allocator,
  35. bool allocate_temp, const tflite::Tensor& flatbuffer_tensor,
  36. const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
  37. ErrorReporter* error_reporter, TfLiteTensor* result);
  38. // Holds placeholder information for a scratch buffer request from a kernel.
  39. // This struct is only used during the model prepare stage. Each request from a
  40. // kernel is stored in the head section. During the prepare stage, the head
  41. // section will at least hold kMaxScratchBuffersPerOp number of requests plus
  42. // any requests from previous kernel requests.
  43. //
  44. // When the memory plan is finalized, these structs are no longer used in favor
  45. // of a sequential, array of ScratchBufferHandle allocations in the tail
  46. // section. These allocations are indexed by the request API defined in the
  47. // TfLiteContext struct.
  48. typedef struct {
  49. // Number of bytes required by the buffer. The actual allocated size might be
  50. // greater than `bytes` due to buffer alignment.
  51. size_t bytes;
  52. // Node where the buffer is allocated for. This provides useful information to
  53. // determine the lifetime of the buffer. In AllocationInfo, this buffer will
  54. // have `before` = node_idx and `after` = node_idx.
  55. int node_idx;
  56. int subgraph_idx;
  57. } ScratchBufferRequest;
  58. } // namespace internal
  59. typedef struct {
  60. TfLiteNode node;
  61. const TfLiteRegistration* registration;
  62. } NodeAndRegistration;
  63. // Holds a pointer to a buffer for a scratch buffer requested by a kernel during
  64. // the model prepare stage. This struct is allocated in-place and allows for
  65. // quick pointer-indexed lookup for speed during model inference.
  66. typedef struct {
  67. // Pointer to location of the scratch buffer:
  68. uint8_t* data;
  69. } ScratchBufferHandle;
  70. // Stores all per-subgraph allocations. This includes the node and registration
  71. // array, tensor list and scratch buffer handles for each subgraph.
  72. typedef struct {
  73. NodeAndRegistration* node_and_registrations;
  74. TfLiteEvalTensor* tensors;
  75. } SubgraphAllocations;
  76. // Allocator responsible for allocating memory for all intermediate tensors
  77. // necessary to invoke a model.
  78. //
  79. // The lifetime of the model, tensor arena and error reporter must be at
  80. // least as long as that of the allocator object, since the allocator needs
  81. // them to be accessible during its entire lifetime.
  82. //
  83. // The MicroAllocator simply plans out additional allocations that are required
  84. // to standup a model for inference in TF Micro. This class currently relies on
  85. // an additional allocator - SimpleMemoryAllocator - for all allocations from an
  86. // arena. These allocations are divided into head (non-persistent) and tail
  87. // (persistent) regions:
  88. //
  89. // Memory layout to help understand how it works
  90. // This information could change in the future version.
  91. // ************** .memory_allocator->GetBuffer()
  92. // Tensors/Scratch buffers (head)
  93. // ************** .head_watermark
  94. // unused memory
  95. // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
  96. // - ->GetDataSize()
  97. // persistent area (tail)
  98. // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
  99. class MicroAllocator {
  100. public:
  101. // Creates a MicroAllocator instance from a given tensor arena. This arena
  102. // will be managed by the created instance. The GreedyMemoryPlanner will
  103. // by default be used and created on the arena.
  104. // Note: Please use alignas(16) to make sure tensor_arena is 16
  105. // bytes aligned, otherwise some head room will be wasted.
  106. // TODO(b/157615197): Cleanup constructor + factory usage.
  107. static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
  108. ErrorReporter* error_reporter);
  109. // Creates a MicroAllocator instance from a given tensor arena and a given
  110. // MemoryPlanner. This arena will be managed by the created instance. Note:
  111. // Please use alignas(16) to make sure tensor_arena is 16 bytes
  112. // aligned, otherwise some head room will be wasted.
  113. static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
  114. MicroMemoryPlanner* memory_planner,
  115. ErrorReporter* error_reporter);
  116. // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
  117. // instance and the MemoryPlanner. This allocator instance will use the
  118. // SimpleMemoryAllocator instance to manage allocations internally.
  119. static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
  120. MicroMemoryPlanner* memory_planner,
  121. ErrorReporter* error_reporter);
  122. // Returns the fixed amount of memory overhead of MicroAllocator.
  123. static size_t GetDefaultTailUsage(bool is_memory_planner_given);
  124. // Allocates internal resources required for model inference for each subgraph
  125. // from the arena.
  126. //
  127. // This method will run through the flatbuffer data supplied in the model to
  128. // properly allocate tensor, node, and op registration data. This method is
  129. // expected to be followed with a call to FinishModelAllocation() Returns a
  130. // pointer to an array of SubgraphAllocations (also stored in the tail of the
  131. // arena) where each index corresponds to a different subgraph in the model.
  132. // Return value is nullptr if the allocations failed.
  133. SubgraphAllocations* StartModelAllocation(const Model* model);
  134. // Finish allocating internal resources required for model inference.
  135. //
  136. // -Plan the memory for activation tensors and scratch buffers.
  137. // -Update eval tensors for each subgraph based on planned offsets.
  138. // -Allocate scratch buffer handles array and update based on planned offsets.
  139. //
  140. // This method should be called after assigning model resources
  141. // in StartModelAllocation(). The subgraph_allocations pointer should be the
  142. // value passed into this class during StartModelAllocation(). Scratch buffer
  143. // handles are stored in the out-param `scratch_buffer_handles` array which is
  144. // allocated in this method. This value will be used in `GetScratchBuffer`
  145. // call to retrieve scratch buffers.
  146. TfLiteStatus FinishModelAllocation(
  147. const Model* model, SubgraphAllocations* subgraph_allocations,
  148. ScratchBufferHandle** scratch_buffer_handles);
  149. // Allocates a TfLiteTensor struct and populates the returned value with
  150. // properties from the model flatbuffer. This struct is allocated from
  151. // persistent arena memory is only guaranteed for the lifetime of the
  152. // application. The eval_tensors pointer should be the value passed into this
  153. // class during StartModelAllocation() and contains the source-of-truth for
  154. // buffers.
  155. virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
  156. const Model* model, const SubgraphAllocations* subgraph_allocations,
  157. int tensor_index, int subgraph_index);
  158. // Allocates a TfLiteTensor struct and populates the returned value with
  159. // properties from the model flatbuffer. This struct is allocated from
  160. // temporary arena memory is only guaranteed until a call is made to
  161. // ResetTempAllocations(). Subgraph_allocaitons contains the array of
  162. // TfLiteEvalTensors. If the newly allocated temp at the specified subgraph
  163. // and tensor index is already present int the TfLiteEvalTensor array, its
  164. // data buffer will be re-used.
  165. virtual TfLiteTensor* AllocateTempTfLiteTensor(
  166. const Model* model, const SubgraphAllocations* subgraph_allocations,
  167. int tensor_index, int subgraph_index);
  168. virtual void DeallocateTempTfLiteTensor(TfLiteTensor*);
  169. // Resets all temporary allocations. This method should be called after a
  170. // chain of temp allocations (e.g. chain of TfLiteTensor objects via
  171. // AllocateTfLiteTensor()).
  172. virtual TfLiteStatus ResetTempAllocations();
  173. // Returns true if all temporary buffers including temp TfLiteTensor are
  174. // already deallocated.
  175. virtual bool IsAllTempDeallocated();
  176. // Allocates persistent buffer which has the same life time as the allocator.
  177. // The memory is immediately available and is allocated from the tail of the
  178. // arena.
  179. virtual void* AllocatePersistentBuffer(size_t bytes);
  180. // Register a scratch buffer of size `bytes` for Node with `node_id`.
  181. // This method only requests a buffer with a given size to be used after a
  182. // model has finished allocation via FinishModelAllocation(). All requested
  183. // buffers will be accessible by the out-param in that method.
  184. TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx,
  185. int* buffer_idx);
  186. // Finish allocating a specific NodeAndRegistration prepare block (kernel
  187. // entry for a model) with a given node ID. This call ensures that any scratch
  188. // buffer requests and temporary allocations are handled and ready for the
  189. // next node prepare block.
  190. TfLiteStatus FinishPrepareNodeAllocations(int node_id);
  191. // Returns the arena usage in bytes, only available after
  192. // `FinishModelAllocation`. Otherwise, it will return 0.
  193. size_t used_bytes() const;
  194. BuiltinDataAllocator* GetBuiltinDataAllocator();
  195. protected:
  196. MicroAllocator(SimpleMemoryAllocator* memory_allocator,
  197. MicroMemoryPlanner* memory_planner,
  198. ErrorReporter* error_reporter);
  199. virtual ~MicroAllocator();
  200. // Allocates an array in the arena to hold pointers to the node and
  201. // registration pointers required to represent the inference graph of the
  202. // model.
  203. virtual TfLiteStatus AllocateNodeAndRegistrations(
  204. const Model* model, SubgraphAllocations* subgraph_allocations);
  205. // Allocates the list of persistent TfLiteEvalTensors that are used for the
  206. // "eval" phase of model inference. These structs will be the source of truth
  207. // for all tensor buffers.
  208. virtual TfLiteStatus AllocateTfLiteEvalTensors(
  209. const Model* model, SubgraphAllocations* subgraph_allocations);
  210. // Allocates persistent tensor buffers for variable tensors in the subgraph.
  211. virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
  212. TfLiteEvalTensor* eval_tensors);
  213. // Allocate and return a persistent TfLiteTensor.
  214. // TODO(b/162311891): Drop this method when the interpreter has an API for
  215. // accessing TfLiteEvalTensor structs.
  216. virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal();
  217. // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
  218. // quantization data is allocated from either the tail (persistent) or temp
  219. // sections of the arena based on the allocation flag.
  220. virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
  221. TfLiteTensor* tensor,
  222. int tensor_index,
  223. int subgraph_idx,
  224. bool allocate_temp);
  225. ErrorReporter* error_reporter() const;
  226. private:
  227. // Commits a memory plan for all non-persistent buffer allocations in the
  228. // 'head' section of the memory arena. The eval_tensors pointer is the list of
  229. // pre-allocated TfLiteEvalTensor structs that will point to the buffers that
  230. // will be allocated into the head section in this function call. The
  231. // scratch_buffer_handles pointer is the array of pre-allocated
  232. // ScratchBufferHandle structs that will point to allocated buffers also in
  233. // the head section.
  234. virtual TfLiteStatus CommitStaticMemoryPlan(
  235. const Model* model, SubgraphAllocations* allocations,
  236. ScratchBufferHandle* scratch_buffer_handles);
  237. // Allocates an array of ScratchBufferHandle structs in the tail section for a
  238. // given number of handles.
  239. virtual TfLiteStatus AllocateScratchBufferHandles(
  240. ScratchBufferHandle** scratch_buffer_handles, size_t handle_count);
  241. // Clears all internal scratch buffer request counts and resets the head to
  242. // prepare for kernels to request scratch buffer data when a model is
  243. // preparing.
  244. TfLiteStatus InitScratchBufferData();
  245. // Returns the pointer for the array of ScratchBufferRequest allocations in
  246. // the head section.
  247. internal::ScratchBufferRequest* GetScratchBufferRequests();
  248. // A simple memory allocator that always allocate from the arena tail or head.
  249. INonPersistentBufferAllocator* non_persistent_buffer_allocator_;
  250. IPersistentBufferAllocator* persistent_buffer_allocator_;
  251. // Allocator used to allocate persistent builtin data.
  252. BuiltinDataAllocator* builtin_data_allocator_;
  253. // Activation buffer memory planner.
  254. MicroMemoryPlanner* memory_planner_;
  255. ErrorReporter* error_reporter_;
  256. bool model_is_allocating_;
  257. // Holds the number of ScratchBufferRequest instances stored in the head
  258. // section when a model is allocating.
  259. size_t scratch_buffer_request_count_ = 0;
  260. // Holds ScratchBufferRequest when a model is allocating
  261. uint8_t* scratch_buffer_head_ = nullptr;
  262. // Holds the byte length of the memory plan with the largest head usage. Used
  263. // to ensure that multi-tenant allocations can share the head for buffers.
  264. size_t max_head_buffer_usage_ = 0;
  265. TF_LITE_REMOVE_VIRTUAL_DELETE
  266. };
  267. } // namespace tflite
  268. #endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_