micro_allocator.cc 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965
  1. /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. ==============================================================================*/
  12. #include "tensorflow/lite/micro/micro_allocator.h"
  13. #include <cstddef>
  14. #include <cstdint>
  15. #include "flatbuffers/flatbuffers.h" // from @flatbuffers
  16. #include "tensorflow/lite/c/c_api_types.h"
  17. #include "tensorflow/lite/c/common.h"
  18. #include "tensorflow/lite/core/api/error_reporter.h"
  19. #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
  20. #include "tensorflow/lite/core/api/op_resolver.h"
  21. #include "tensorflow/lite/core/api/tensor_utils.h"
  22. #include "tensorflow/lite/kernels/internal/compatibility.h"
  23. #include "tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h"
  24. #include "tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h"
  25. #include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
  26. #include "tensorflow/lite/micro/compatibility.h"
  27. #include "tensorflow/lite/micro/flatbuffer_utils.h"
  28. #include "tensorflow/lite/micro/memory_helpers.h"
  29. #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
  30. #include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
  31. #include "tensorflow/lite/micro/micro_allocation_info.h"
  32. #include "tensorflow/lite/micro/micro_arena_constants.h"
  33. #include "tensorflow/lite/micro/micro_error_reporter.h"
  34. #include "tensorflow/lite/schema/schema_generated.h"
  35. #include "tensorflow/lite/schema/schema_utils.h"
  36. namespace tflite {
  37. namespace {
  38. // Maximum number of scratch buffer requests per operator. Operator kernels that
  39. // request more than this value will receive an exception.
  40. constexpr size_t kMaxScratchBuffersPerOp = 12;
  41. // Sentinel value used as a placeholder to mark a ScratchBufferRequest request
  42. // needs a node id assignment.
  43. constexpr int kUnassignedScratchBufferRequestIndex = -1;
  44. const TfLiteIntArray kZeroLengthIntArray = {};
  45. class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
  46. public:
  47. explicit MicroBuiltinDataAllocator(
  48. IPersistentBufferAllocator* persistent_allocator)
  49. : persistent_allocator_(persistent_allocator) {}
  50. void* Allocate(size_t size, size_t alignment_hint) override {
  51. return persistent_allocator_->AllocatePersistentBuffer(size,
  52. alignment_hint);
  53. }
  54. void Deallocate(void* data) override {
  55. // Do not deallocate, builtin data needs to be available for the life time
  56. // of the model.
  57. }
  58. TF_LITE_REMOVE_VIRTUAL_DELETE
  59. private:
  60. IPersistentBufferAllocator* persistent_allocator_;
  61. };
  62. TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
  63. MicroMemoryPlanner* planner,
  64. const AllocationInfo* allocation_info,
  65. size_t allocation_info_size) {
  66. // Add the tensors to our allocation plan.
  67. for (size_t i = 0; i < allocation_info_size; ++i) {
  68. const AllocationInfo* current = &allocation_info[i];
  69. if (current->needs_allocating) {
  70. size_t aligned_bytes_required =
  71. AlignSizeUp(current->bytes, MicroArenaBufferAlignment());
  72. if (current->offline_offset == kOnlinePlannedBuffer) {
  73. TF_LITE_ENSURE_STATUS(
  74. planner->AddBuffer(error_reporter, aligned_bytes_required,
  75. current->first_created, current->last_used));
  76. } else {
  77. TF_LITE_ENSURE_STATUS(planner->AddBuffer(
  78. error_reporter, aligned_bytes_required, current->first_created,
  79. current->last_used, current->offline_offset));
  80. }
  81. }
  82. }
  83. return kTfLiteOk;
  84. }
  85. TfLiteStatus CommitPlan(ErrorReporter* error_reporter,
  86. MicroMemoryPlanner* planner, uint8_t* starting_point,
  87. const AllocationInfo* allocation_info,
  88. size_t allocation_info_size) {
  89. // Figure out the actual memory addresses for each buffer, based on the plan.
  90. int planner_index = 0;
  91. for (size_t i = 0; i < allocation_info_size; ++i) {
  92. const AllocationInfo* current = &allocation_info[i];
  93. if (current->needs_allocating) {
  94. int offset = -1;
  95. TF_LITE_ENSURE_STATUS(
  96. planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
  97. *current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
  98. ++planner_index;
  99. }
  100. }
  101. return kTfLiteOk;
  102. }
  103. IPersistentBufferAllocator* CreatePersistentArenaAllocator(uint8_t* buffer_head,
  104. size_t buffer_size) {
  105. // Align the actually used area by the tail because persistent buffer grows
  106. // from the bottom to top.
  107. uint8_t* aligned_buffer_tail =
  108. AlignPointerDown(buffer_head + buffer_size, MicroArenaBufferAlignment());
  109. size_t aligned_buffer_size = aligned_buffer_tail - buffer_head;
  110. PersistentArenaBufferAllocator tmp =
  111. PersistentArenaBufferAllocator(buffer_head, aligned_buffer_size);
  112. // Allocate enough bytes from the buffer to create a
  113. // SingleArenaBufferAllocator. The new instance will use the current adjusted
  114. // tail buffer from the tmp allocator instance.
  115. uint8_t* allocator_buffer =
  116. tmp.AllocatePersistentBuffer(sizeof(PersistentArenaBufferAllocator),
  117. alignof(PersistentArenaBufferAllocator));
  118. // Use the default copy constructor to populate internal states.
  119. return new (allocator_buffer) PersistentArenaBufferAllocator(tmp);
  120. }
  121. // NonPersistentBufferAllocator instance is created in the persistent buffer
  122. // because it has to be persistent to keep track of the non-persistent buffer
  123. // information.
  124. INonPersistentBufferAllocator* CreateNonPersistentArenaAllocator(
  125. uint8_t* buffer_head, size_t buffer_size,
  126. IPersistentBufferAllocator* persistent_buffer_allocator) {
  127. uint8_t* allocator_buffer =
  128. persistent_buffer_allocator->AllocatePersistentBuffer(
  129. sizeof(NonPersistentArenaBufferAllocator),
  130. alignof(NonPersistentArenaBufferAllocator));
  131. // Align the actually used area by the head because persistent buffer grows
  132. // from the head to bottom.
  133. uint8_t* aligned_buffer_head =
  134. AlignPointerUp(buffer_head, MicroArenaBufferAlignment());
  135. size_t aligned_buffer_size = buffer_head + buffer_size - aligned_buffer_head;
  136. INonPersistentBufferAllocator* non_persistent_buffer_allocator =
  137. new (allocator_buffer) NonPersistentArenaBufferAllocator(
  138. aligned_buffer_head, aligned_buffer_size);
  139. return non_persistent_buffer_allocator;
  140. }
  141. } // namespace
  142. namespace internal {
  143. // Returns a pointer to any buffer associated with the flatbuffer tensor. Can
  144. // return nullptr if no buffer is found.
  145. void* GetFlatbufferTensorBuffer(
  146. const tflite::Tensor& flatbuffer_tensor,
  147. const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers) {
  148. // We need to figure out where the actual contents of this tensor are stored
  149. // in memory. We'll check to see if there's a serialized buffer (pretty much
  150. // the same as a constant op in TensorFlow) associated with this tensor first,
  151. // and if there is update the runtime structure to point to its location in
  152. // memory.
  153. // First see if there's any buffer information in the serialized tensor.
  154. // TODO(b/170379532): Add better unit tests to validate flatbuffer values.
  155. void* out_buffer = nullptr;
  156. if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
  157. // If we've found a buffer, does it have any data?
  158. if (auto* array = buffer->data()) {
  159. // If it has any data, is the data size larger than zero?
  160. if (array->size()) {
  161. // We've found a buffer with valid data, so update the runtime tensor
  162. // data structure to point to it.
  163. out_buffer = const_cast<void*>(static_cast<const void*>(array->data()));
  164. }
  165. }
  166. // TODO(petewarden): It's not clear in what circumstances we could have a
  167. // buffer in the serialized tensor, but it doesn't have any data in it. Is
  168. // that a validly-generated file, and if so what does it mean, or is it an
  169. // error condition? It would be good to tighten up the specification to make
  170. // it less ambiguous.
  171. }
  172. return out_buffer;
  173. }
  174. TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
  175. IPersistentBufferAllocator* persistent_buffer_allocator,
  176. INonPersistentBufferAllocator* non_persistent_buffer_allocator,
  177. bool allocate_temp, const tflite::Tensor& flatbuffer_tensor,
  178. const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
  179. ErrorReporter* error_reporter, TfLiteTensor* result) {
  180. TFLITE_DCHECK(result != nullptr);
  181. *result = {};
  182. // Make sure the serialized type is one we know how to deal with, and convert
  183. // it from a flatbuffer enum into a constant used by the kernel C API.
  184. TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
  185. &result->type, error_reporter));
  186. // Make sure we remember if the serialized tensor is designated as a variable.
  187. result->is_variable = flatbuffer_tensor.is_variable();
  188. result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
  189. // TODO(petewarden): Some of these paths aren't getting enough testing
  190. // coverage, so we should figure out some tests that exercise them.
  191. if (result->data.data == nullptr) {
  192. // The tensor contents haven't been set from a serialized buffer, so
  193. // make a note that they will be allocated from memory. The actual
  194. // allocation won't happen until later.
  195. result->allocation_type = kTfLiteArenaRw;
  196. } else {
  197. // We set the data from a serialized buffer, so record tha.
  198. result->allocation_type = kTfLiteMmapRo;
  199. }
  200. // Figure out what the size in bytes of the buffer is and store it.
  201. size_t type_size;
  202. TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
  203. flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
  204. if (flatbuffer_tensor.shape() == nullptr) {
  205. // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
  206. // tensor.
  207. // TODO(b/188459715): figure out why const_cast is required here.
  208. result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
  209. } else {
  210. // TFLM doesn't allow reshaping the tensor which requires dynamic memory
  211. // allocation so it is safe to drop the const qualifier. In the future, if
  212. // we really want to update the tensor shape, we can always pass in a new
  213. // TfLiteIntArray - especially we have to do so if the dimension is
  214. result->dims = FlatBufferVectorToTfLiteTypeArray(flatbuffer_tensor.shape());
  215. }
  216. // Copy the quantization information from the serialized data.
  217. const auto* src_quantization = flatbuffer_tensor.quantization();
  218. if (src_quantization && src_quantization->scale() &&
  219. (src_quantization->scale()->size() > 0) &&
  220. src_quantization->zero_point() &&
  221. (src_quantization->zero_point()->size() > 0)) {
  222. // Always populate the TfLiteTensor.params field, even if there are
  223. // per-channel quantization parameters.
  224. result->params.scale = src_quantization->scale()->Get(0);
  225. // Note that the zero_point field in the FlatBuffers schema is a 64-bit
  226. // integer, but the zero_point field in the TfLiteQuantizationParams struct
  227. // is a 32-bit integer.
  228. result->params.zero_point =
  229. static_cast<int32_t>(src_quantization->zero_point()->Get(0));
  230. // Populate per-channel quantization params.
  231. int channels = src_quantization->scale()->size();
  232. TfLiteAffineQuantization* quantization =
  233. allocate_temp
  234. ? reinterpret_cast<TfLiteAffineQuantization*>(
  235. non_persistent_buffer_allocator->AllocateTemp(
  236. sizeof(TfLiteAffineQuantization),
  237. alignof(TfLiteAffineQuantization)))
  238. : reinterpret_cast<TfLiteAffineQuantization*>(
  239. persistent_buffer_allocator->AllocatePersistentBuffer(
  240. sizeof(TfLiteAffineQuantization),
  241. alignof(TfLiteAffineQuantization)));
  242. if (quantization == nullptr) {
  243. TF_LITE_REPORT_ERROR(error_reporter,
  244. "Unable to allocate TfLiteAffineQuantization.\n");
  245. return kTfLiteError;
  246. }
  247. // TODO(b/153688719): Reduce tail allocation by using a global zero-point
  248. // buffer. This value can not be reused from the flatbuffer since the
  249. // zero_point is stored as a int64_t.
  250. quantization->zero_point =
  251. allocate_temp
  252. ? reinterpret_cast<TfLiteIntArray*>(
  253. non_persistent_buffer_allocator->AllocateTemp(
  254. TfLiteIntArrayGetSizeInBytes(channels),
  255. alignof(TfLiteIntArray)))
  256. : reinterpret_cast<TfLiteIntArray*>(
  257. persistent_buffer_allocator->AllocatePersistentBuffer(
  258. TfLiteIntArrayGetSizeInBytes(channels),
  259. alignof(TfLiteIntArray)));
  260. if (quantization->zero_point == nullptr) {
  261. TF_LITE_REPORT_ERROR(error_reporter,
  262. "Unable to allocate quantization->zero_point.\n");
  263. return kTfLiteError;
  264. }
  265. quantization->scale =
  266. FlatBufferVectorToTfLiteTypeArray(src_quantization->scale());
  267. quantization->zero_point->size = channels;
  268. int* zero_point_data = quantization->zero_point->data;
  269. for (int i = 0; i < channels; i++) {
  270. // As a space-saving optimization, zero point arrays for weights can be
  271. // reduced to a single value, since all zero points for weights are 0.
  272. zero_point_data[i] = src_quantization->zero_point()->size() ==
  273. src_quantization->scale()->size()
  274. ? src_quantization->zero_point()->Get(i)
  275. : src_quantization->zero_point()->Get(0);
  276. }
  277. // TODO(rocky): Need to add a micro_allocator test case that fails when
  278. // this is not copied:
  279. quantization->quantized_dimension = src_quantization->quantized_dimension();
  280. result->quantization = {kTfLiteAffineQuantization, quantization};
  281. }
  282. return kTfLiteOk;
  283. }
  284. TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
  285. const tflite::Tensor& flatbuffer_tensor,
  286. const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
  287. ErrorReporter* error_reporter, TfLiteEvalTensor* result) {
  288. *result = {};
  289. // Make sure the serialized type is one we know how to deal with, and convert
  290. // it from a flatbuffer enum into a constant used by the kernel C API.
  291. TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
  292. &result->type, error_reporter));
  293. result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
  294. if (flatbuffer_tensor.shape() == nullptr) {
  295. // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
  296. // tensor.
  297. result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
  298. } else {
  299. result->dims = FlatBufferVectorToTfLiteTypeArray(flatbuffer_tensor.shape());
  300. }
  301. return kTfLiteOk;
  302. }
  303. } // namespace internal
  304. size_t MicroAllocator::GetDefaultTailUsage(bool is_memory_planner_given) {
  305. // TODO(b/208703041): a template version of AlignSizeUp to make expression
  306. // shorter.
  307. size_t total_size =
  308. AlignSizeUp(sizeof(SingleArenaBufferAllocator),
  309. alignof(SingleArenaBufferAllocator)) +
  310. AlignSizeUp(sizeof(MicroAllocator), alignof(MicroAllocator)) +
  311. AlignSizeUp(sizeof(MicroBuiltinDataAllocator),
  312. alignof(MicroBuiltinDataAllocator)) +
  313. AlignSizeUp(sizeof(SubgraphAllocations), alignof(SubgraphAllocations));
  314. if (!is_memory_planner_given) {
  315. total_size +=
  316. AlignSizeUp(sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
  317. }
  318. return total_size;
  319. }
  320. MicroAllocator::MicroAllocator(SingleArenaBufferAllocator* memory_allocator,
  321. MicroMemoryPlanner* memory_planner,
  322. ErrorReporter* error_reporter)
  323. : non_persistent_buffer_allocator_(memory_allocator),
  324. persistent_buffer_allocator_(memory_allocator),
  325. memory_planner_(memory_planner),
  326. error_reporter_(error_reporter),
  327. model_is_allocating_(false) {}
  328. MicroAllocator::MicroAllocator(
  329. IPersistentBufferAllocator* persistent_buffer_allocator,
  330. INonPersistentBufferAllocator* non_persistent_buffer_allocator,
  331. MicroMemoryPlanner* memory_planner, ErrorReporter* error_reporter)
  332. : non_persistent_buffer_allocator_(non_persistent_buffer_allocator),
  333. persistent_buffer_allocator_(persistent_buffer_allocator),
  334. memory_planner_(memory_planner),
  335. error_reporter_(error_reporter),
  336. model_is_allocating_(false) {}
  337. MicroAllocator::~MicroAllocator() {}
  338. MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
  339. MicroMemoryPlanner* memory_planner,
  340. ErrorReporter* error_reporter) {
  341. uint8_t* aligned_arena =
  342. AlignPointerUp(tensor_arena, MicroArenaBufferAlignment());
  343. size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
  344. SingleArenaBufferAllocator* memory_allocator =
  345. SingleArenaBufferAllocator::Create(error_reporter, aligned_arena,
  346. aligned_arena_size);
  347. return Create(memory_allocator, memory_planner, error_reporter);
  348. }
  349. MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
  350. ErrorReporter* error_reporter) {
  351. uint8_t* aligned_arena =
  352. AlignPointerUp(tensor_arena, MicroArenaBufferAlignment());
  353. size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
  354. SingleArenaBufferAllocator* memory_allocator =
  355. SingleArenaBufferAllocator::Create(error_reporter, aligned_arena,
  356. aligned_arena_size);
  357. // By default create GreedyMemoryPlanner.
  358. // If a different MemoryPlanner is needed, use the other api.
  359. uint8_t* memory_planner_buffer = memory_allocator->AllocatePersistentBuffer(
  360. sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
  361. GreedyMemoryPlanner* memory_planner =
  362. new (memory_planner_buffer) GreedyMemoryPlanner();
  363. return Create(memory_allocator, memory_planner, error_reporter);
  364. }
  365. MicroAllocator* MicroAllocator::Create(
  366. SingleArenaBufferAllocator* memory_allocator,
  367. MicroMemoryPlanner* memory_planner, ErrorReporter* error_reporter) {
  368. TFLITE_DCHECK(memory_allocator != nullptr);
  369. TFLITE_DCHECK(error_reporter != nullptr);
  370. TFLITE_DCHECK(memory_planner != nullptr);
  371. uint8_t* allocator_buffer = memory_allocator->AllocatePersistentBuffer(
  372. sizeof(MicroAllocator), alignof(MicroAllocator));
  373. MicroAllocator* allocator = new (allocator_buffer) MicroAllocator(
  374. memory_allocator, memory_allocator, memory_planner, error_reporter);
  375. return allocator;
  376. }
  377. MicroAllocator* MicroAllocator::Create(uint8_t* persistent_tensor_arena,
  378. size_t persistent_arena_size,
  379. uint8_t* non_persistent_tensor_arena,
  380. size_t non_persistent_arena_size,
  381. ErrorReporter* error_reporter) {
  382. TFLITE_DCHECK(persistent_tensor_arena != nullptr);
  383. TFLITE_DCHECK(non_persistent_tensor_arena != nullptr);
  384. TFLITE_DCHECK(persistent_tensor_arena != non_persistent_tensor_arena);
  385. TFLITE_DCHECK(error_reporter != nullptr);
  386. IPersistentBufferAllocator* persistent_buffer_allocator =
  387. CreatePersistentArenaAllocator(persistent_tensor_arena,
  388. persistent_arena_size);
  389. INonPersistentBufferAllocator* non_persistent_buffer_allocator =
  390. CreateNonPersistentArenaAllocator(non_persistent_tensor_arena,
  391. non_persistent_arena_size,
  392. persistent_buffer_allocator);
  393. uint8_t* memory_planner_buffer =
  394. persistent_buffer_allocator->AllocatePersistentBuffer(
  395. sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
  396. GreedyMemoryPlanner* memory_planner =
  397. new (memory_planner_buffer) GreedyMemoryPlanner();
  398. uint8_t* micro_allocator_buffer =
  399. persistent_buffer_allocator->AllocatePersistentBuffer(
  400. sizeof(MicroAllocator), alignof(MicroAllocator));
  401. MicroAllocator* allocator = new (micro_allocator_buffer) MicroAllocator(
  402. persistent_buffer_allocator, non_persistent_buffer_allocator,
  403. memory_planner, error_reporter);
  404. return allocator;
  405. }
  406. SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
  407. TFLITE_DCHECK(model != nullptr);
  408. if (model_is_allocating_) {
  409. TF_LITE_REPORT_ERROR(error_reporter_,
  410. "MicroAllocator: Model allocation started before "
  411. "finishing previously allocated model");
  412. return nullptr;
  413. }
  414. model_is_allocating_ = true;
  415. uint8_t* data_allocator_buffer =
  416. persistent_buffer_allocator_->AllocatePersistentBuffer(
  417. sizeof(MicroBuiltinDataAllocator),
  418. alignof(MicroBuiltinDataAllocator));
  419. builtin_data_allocator_ = new (data_allocator_buffer)
  420. MicroBuiltinDataAllocator(persistent_buffer_allocator_);
  421. if (InitScratchBufferData() != kTfLiteOk) {
  422. return nullptr;
  423. }
  424. // Allocate struct to store eval tensors, nodes and registrations.
  425. SubgraphAllocations* output = reinterpret_cast<SubgraphAllocations*>(
  426. persistent_buffer_allocator_->AllocatePersistentBuffer(
  427. sizeof(SubgraphAllocations) * model->subgraphs()->size(),
  428. alignof(SubgraphAllocations)));
  429. if (output == nullptr) {
  430. MicroPrintf("Failed to allocate memory for model metadata.");
  431. return nullptr;
  432. }
  433. if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk ||
  434. AllocateNodeAndRegistrations(model, output) != kTfLiteOk) {
  435. return nullptr;
  436. }
  437. return output;
  438. }
  439. TfLiteStatus MicroAllocator::FinishModelAllocation(
  440. const Model* model, SubgraphAllocations* subgraph_allocations,
  441. ScratchBufferHandle** scratch_buffer_handles) {
  442. if (!model_is_allocating_) {
  443. TF_LITE_REPORT_ERROR(error_reporter_,
  444. "MicroAllocator: Model allocation finished before "
  445. "starting allocating model");
  446. return kTfLiteError;
  447. }
  448. // Allocate scratch buffer metadata.
  449. TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
  450. scratch_buffer_handles, scratch_buffer_request_count_));
  451. // Allocate buffers for variable tensors.
  452. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
  453. subgraph_idx++) {
  454. const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
  455. TFLITE_DCHECK(subgraph != nullptr);
  456. TF_LITE_ENSURE_STATUS(AllocateVariables(
  457. subgraph, subgraph_allocations[subgraph_idx].tensors));
  458. }
  459. // Plan all subgraphs and scratch buffers together.
  460. TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations,
  461. *scratch_buffer_handles));
  462. model_is_allocating_ = false;
  463. return kTfLiteOk;
  464. }
  465. void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
  466. return persistent_buffer_allocator_->AllocatePersistentBuffer(
  467. bytes, MicroArenaBufferAlignment());
  468. }
  469. TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
  470. int subgraph_idx,
  471. int* buffer_idx) {
  472. // All scratch buffer requests are stored in the head section of the arena
  473. // when a model is in the prepare phase. First align a scratch buffer request
  474. // pointer to the start of the head:
  475. internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
  476. // Count the number of requested scratch buffers for the current node:
  477. size_t current_node_request_count = 0;
  478. for (size_t i = 0; i < scratch_buffer_request_count_; ++i) {
  479. if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) {
  480. ++current_node_request_count;
  481. }
  482. }
  483. // First, ensure that the per-kernel request has not exceeded the limit:
  484. if (current_node_request_count >= kMaxScratchBuffersPerOp) {
  485. TF_LITE_REPORT_ERROR(
  486. error_reporter_,
  487. "Scratch buffer request exeeds limit per operator (%d)",
  488. kMaxScratchBuffersPerOp);
  489. return kTfLiteError;
  490. }
  491. // Initialize and assign values for the request at the current index:
  492. internal::ScratchBufferRequest* current_request =
  493. &requests[scratch_buffer_request_count_];
  494. *current_request = {};
  495. // Assign -1 as a sentinel value that will be updated when the node finishes
  496. // allocating:
  497. current_request->bytes = bytes;
  498. current_request->node_idx = kUnassignedScratchBufferRequestIndex;
  499. current_request->subgraph_idx = subgraph_idx;
  500. // Assign the current request index to the out-param:
  501. *buffer_idx = scratch_buffer_request_count_;
  502. // Bump the request count to prepare for the next request:
  503. ++scratch_buffer_request_count_;
  504. return kTfLiteOk;
  505. }
  506. TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
  507. // When a node has finished preparing, all temp allocations performed by the
  508. // kernel should be cleaned up:
  509. TF_LITE_ENSURE_STATUS(ResetTempAllocations());
  510. // Find and update any new scratch buffer requests for the current node:
  511. internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
  512. for (size_t i = 0; i < scratch_buffer_request_count_; ++i) {
  513. // A request with a node_idx of -1 is a sentinel value used to indicate this
  514. // was a new request for the current node. The allocator finally knows the
  515. // node index at this point. Assign the value and update the list of new
  516. // requests so the head section can be adjusted to allow for the next kernel
  517. // to allocate at most kMaxScratchBuffersPerOp requests:
  518. if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) {
  519. requests[i].node_idx = node_id;
  520. }
  521. }
  522. // Ensure that the head is re-adjusted to allow for another at-most
  523. // kMaxScratchBuffersPerOp scratch buffer requests in the next operator:
  524. TF_LITE_ENSURE_STATUS(non_persistent_buffer_allocator_->ResizeBuffer(
  525. scratch_buffer_head_,
  526. sizeof(internal::ScratchBufferRequest) *
  527. (scratch_buffer_request_count_ + kMaxScratchBuffersPerOp),
  528. alignof(internal::ScratchBufferRequest)));
  529. return kTfLiteOk;
  530. }
  531. size_t MicroAllocator::used_bytes() const {
  532. return non_persistent_buffer_allocator_->GetNonPersistentUsedBytes() +
  533. persistent_buffer_allocator_->GetPersistentUsedBytes();
  534. }
  535. TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
  536. const Model* model, SubgraphAllocations* subgraph_allocations) {
  537. TFLITE_DCHECK(subgraph_allocations != nullptr);
  538. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
  539. subgraph_idx++) {
  540. const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
  541. TFLITE_DCHECK(subgraph != nullptr);
  542. uint32_t operators_size = NumSubgraphOperators(subgraph);
  543. // Initialize NodeAndRegistrations for the subgraph.
  544. NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
  545. persistent_buffer_allocator_->AllocatePersistentBuffer(
  546. sizeof(NodeAndRegistration) * operators_size,
  547. alignof(NodeAndRegistration)));
  548. if (output == nullptr) {
  549. TF_LITE_REPORT_ERROR(
  550. error_reporter_,
  551. "Failed to allocate memory for node_and_registrations.");
  552. return kTfLiteError;
  553. }
  554. subgraph_allocations[subgraph_idx].node_and_registrations = output;
  555. }
  556. return kTfLiteOk;
  557. }
  558. TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
  559. const Model* model, const SubgraphAllocations* subgraph_allocations,
  560. int tensor_index, int subgraph_index) {
  561. const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
  562. TFLITE_DCHECK(subgraph != nullptr);
  563. // This value is allocated from persistent arena space. It is guaranteed to be
  564. // around for the lifetime of the application.
  565. TfLiteTensor* tensor = AllocatePersistentTfLiteTensorInternal();
  566. // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
  567. // allocated in the persistent section of the arena, ensure that additional
  568. // allocations also take place in that section of the arena.
  569. if (PopulateTfLiteTensorFromFlatbuffer(
  570. model, tensor, tensor_index, subgraph_index,
  571. /*allocate_temp=*/false) != kTfLiteOk) {
  572. TF_LITE_REPORT_ERROR(error_reporter_,
  573. "Failed to populate a persistent TfLiteTensor struct "
  574. "from flatbuffer data!");
  575. return nullptr;
  576. }
  577. if (subgraph_allocations != nullptr) {
  578. // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
  579. // and not located in the flatbuffer are stored on the pre-allocated list of
  580. // TfLiteEvalTensors structs. These structs are the source of truth, simply
  581. // point the corresponding buffer to the new TfLiteTensor data value.
  582. tensor->data.data =
  583. subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
  584. // TfLiteEvalTensor structs must also be the source of truth for the
  585. // TfLiteTensor dims.
  586. tensor->dims =
  587. subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
  588. }
  589. return tensor;
  590. }
  591. void MicroAllocator::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
  592. TFLITE_DCHECK(tensor != nullptr);
  593. if (tensor->quantization.type == kTfLiteAffineQuantization) {
  594. TFLITE_DCHECK(tensor->quantization.params != nullptr);
  595. TfLiteAffineQuantization* quantization =
  596. reinterpret_cast<TfLiteAffineQuantization*>(
  597. tensor->quantization.params);
  598. non_persistent_buffer_allocator_->DeallocateTemp(
  599. reinterpret_cast<uint8_t*>(quantization->zero_point));
  600. non_persistent_buffer_allocator_->DeallocateTemp(
  601. reinterpret_cast<uint8_t*>(quantization));
  602. }
  603. // Clear the data in case someone still access tensor arena by mistake
  604. tensor->quantization.type = kTfLiteNoQuantization;
  605. tensor->quantization.params = nullptr;
  606. tensor->data.data = nullptr;
  607. tensor->dims = nullptr;
  608. non_persistent_buffer_allocator_->DeallocateTemp(
  609. reinterpret_cast<uint8_t*>(tensor));
  610. }
  611. TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
  612. const Model* model, const SubgraphAllocations* subgraph_allocations,
  613. int tensor_index, int subgraph_index) {
  614. const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
  615. TFLITE_DCHECK(subgraph != nullptr);
  616. // This value is allocated from temporary arena space. It is guaranteed to be
  617. // around for at least the scope of the calling function. Since this struct
  618. // allocation takes place in temp space, no need to own or cleanup.
  619. TfLiteTensor* tensor = reinterpret_cast<TfLiteTensor*>(
  620. non_persistent_buffer_allocator_->AllocateTemp(sizeof(TfLiteTensor),
  621. alignof(TfLiteTensor)));
  622. // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
  623. // allocated in the temp section of the arena, ensure that additional
  624. // allocations also take place in that section of the arena.
  625. if (PopulateTfLiteTensorFromFlatbuffer(model, tensor, tensor_index,
  626. subgraph_index,
  627. /*allocate_temp=*/true) != kTfLiteOk) {
  628. TF_LITE_REPORT_ERROR(
  629. error_reporter_,
  630. "Failed to populate a temp TfLiteTensor struct from flatbuffer data!");
  631. return nullptr;
  632. }
  633. if (subgraph_allocations != nullptr) {
  634. // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
  635. // and not located in the flatbuffer are stored on the pre-allocated list of
  636. // TfLiteEvalTensors structs. These structs are the source of truth, simply
  637. // point the corresponding buffer to the new TfLiteTensor data value.
  638. tensor->data.data =
  639. subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
  640. // TfLiteEvalTensor structs must also be the source of truth for the
  641. // TfLiteTensor dims.
  642. tensor->dims =
  643. subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
  644. }
  645. return tensor;
  646. }
  647. TfLiteStatus MicroAllocator::ResetTempAllocations() {
  648. return non_persistent_buffer_allocator_->ResetTempAllocations();
  649. }
  650. bool MicroAllocator::IsAllTempDeallocated() {
  651. return non_persistent_buffer_allocator_->IsAllTempDeallocated();
  652. }
  653. TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
  654. const Model* model, SubgraphAllocations* subgraph_allocations) {
  655. TFLITE_DCHECK(subgraph_allocations != nullptr);
  656. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
  657. subgraph_idx++) {
  658. const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
  659. TFLITE_DCHECK(subgraph != nullptr);
  660. size_t alloc_count = subgraph->tensors()->size();
  661. TfLiteEvalTensor* tensors = reinterpret_cast<TfLiteEvalTensor*>(
  662. persistent_buffer_allocator_->AllocatePersistentBuffer(
  663. sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
  664. if (tensors == nullptr) {
  665. TF_LITE_REPORT_ERROR(
  666. error_reporter_,
  667. "Failed to allocate memory for context->eval_tensors, "
  668. "%d bytes required",
  669. sizeof(TfLiteEvalTensor) * alloc_count);
  670. return kTfLiteError;
  671. }
  672. for (size_t i = 0; i < alloc_count; ++i) {
  673. TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
  674. *subgraph->tensors()->Get(i), model->buffers(), error_reporter_,
  675. &tensors[i]);
  676. if (status != kTfLiteOk) {
  677. TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
  678. i);
  679. return kTfLiteError;
  680. }
  681. }
  682. subgraph_allocations[subgraph_idx].tensors = tensors;
  683. }
  684. return kTfLiteOk;
  685. }
  686. TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
  687. TfLiteEvalTensor* eval_tensors) {
  688. for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
  689. auto* tensor = subgraph->tensors()->Get(i);
  690. if (tensor->is_variable()) {
  691. size_t buffer_size;
  692. TF_LITE_ENSURE_STATUS(
  693. TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
  694. eval_tensors[i].data.data =
  695. persistent_buffer_allocator_->AllocatePersistentBuffer(
  696. buffer_size, MicroArenaBufferAlignment());
  697. if (eval_tensors[i].data.data == nullptr) {
  698. TF_LITE_REPORT_ERROR(error_reporter_,
  699. "Failed to allocate variable tensor of size %d",
  700. buffer_size);
  701. return kTfLiteError;
  702. }
  703. }
  704. }
  705. return kTfLiteOk;
  706. }
  707. TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal() {
  708. return reinterpret_cast<TfLiteTensor*>(
  709. persistent_buffer_allocator_->AllocatePersistentBuffer(
  710. sizeof(TfLiteTensor), alignof(TfLiteTensor)));
  711. }
  712. TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
  713. const Model* model, TfLiteTensor* tensor, int tensor_index,
  714. int subgraph_idx, bool allocate_temp) {
  715. // TODO(b/162311891): This method serves as a stub to ensure quantized
  716. // allocations in the tail can be recorded. Once the interpreter has APIs for
  717. // accessing buffers on TfLiteEvalTensor this method can be dropped.
  718. return internal::InitializeTfLiteTensorFromFlatbuffer(
  719. persistent_buffer_allocator_, non_persistent_buffer_allocator_,
  720. allocate_temp,
  721. *model->subgraphs()->Get(subgraph_idx)->tensors()->Get(tensor_index),
  722. model->buffers(), error_reporter_, tensor);
  723. }
  724. ErrorReporter* MicroAllocator::error_reporter() const {
  725. return error_reporter_;
  726. }
  727. TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
  728. const Model* model, SubgraphAllocations* allocations,
  729. ScratchBufferHandle* scratch_buffer_handles) {
  730. size_t head_usage = 0;
  731. // Create static memory plan
  732. // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
  733. // 2. Add them into the planner (such as the GreedyMemoryPlanner).
  734. // 3. Static memory planning using the planner.
  735. // 4. Set tensor/buffer pointers based on the offsets from the previous step.
  736. //
  737. // Note that AllocationInfo is only needed for creating the plan. It will be
  738. // allocated from the temp section and cleaned up at the bottom of this
  739. // function.
  740. // Use the AllocationInfoBuilder class to help determine where buffers are
  741. // used in the subgraph.
  742. AllocationInfoBuilder builder(model, non_persistent_buffer_allocator_,
  743. error_reporter_);
  744. TF_LITE_ENSURE_STATUS(
  745. builder.CreateAllocationInfo(scratch_buffer_request_count_));
  746. const int32_t* offline_planner_offsets = nullptr;
  747. TF_LITE_ENSURE_STATUS(
  748. builder.GetOfflinePlannedOffsets(&offline_planner_offsets));
  749. TF_LITE_ENSURE_STATUS(
  750. builder.InitializeAllocationInfo(offline_planner_offsets, allocations));
  751. internal::ScratchBufferRequest* scratch_buffer_requests =
  752. GetScratchBufferRequests();
  753. TF_LITE_ENSURE_STATUS(builder.MarkAllocationLifetimes(
  754. 0, scratch_buffer_requests, scratch_buffer_handles, allocations));
  755. int allocation_info_count = builder.AllocationCount();
  756. AllocationInfo* allocation_info = builder.Finish();
  757. // Remaining arena size that memory planner can use for calculating offsets.
  758. size_t remaining_arena_size =
  759. non_persistent_buffer_allocator_->GetAvailableMemory(
  760. MicroArenaBufferAlignment());
  761. uint8_t* planner_arena = non_persistent_buffer_allocator_->AllocateTemp(
  762. remaining_arena_size, MicroArenaBufferAlignment());
  763. TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
  764. memory_planner_->Init(planner_arena, remaining_arena_size);
  765. TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, memory_planner_,
  766. allocation_info, allocation_info_count));
  767. // Commit the plan.
  768. TF_LITE_ENSURE_STATUS(
  769. CommitPlan(error_reporter_, memory_planner_,
  770. non_persistent_buffer_allocator_->GetOverlayMemoryAddress(),
  771. allocation_info, allocation_info_count));
  772. // Reset all temp allocations used above:
  773. builder.FreeAllocationInfo();
  774. non_persistent_buffer_allocator_->DeallocateTemp(planner_arena);
  775. TF_LITE_ENSURE_STATUS(
  776. non_persistent_buffer_allocator_->ResetTempAllocations());
  777. TF_LITE_ENSURE_STATUS(
  778. non_persistent_buffer_allocator_->DeallocateResizableBuffer(
  779. scratch_buffer_head_));
  780. #ifdef TF_LITE_SHOW_MEMORY_USE
  781. memory_planner_->PrintMemoryPlan();
  782. #endif
  783. head_usage = memory_planner_->GetMaximumMemorySize();
  784. // The head is used to store memory plans for one model at a time during the
  785. // model preparation stage, and is re-purposed to store scratch buffer handles
  786. // during model invocation. The head must be as large as the greater of the
  787. // largest model memory plan's size and the total space required for all
  788. // scratch buffer handles.
  789. if (max_head_buffer_usage_ < head_usage) {
  790. max_head_buffer_usage_ = head_usage;
  791. }
  792. // The head is used for storing scratch buffer allocations before finalizing a
  793. // memory plan in this function. Ensure that the head is set to the largest
  794. // memory plan sent through the allocator:
  795. TF_LITE_ENSURE_STATUS(
  796. non_persistent_buffer_allocator_->ReserveNonPersistentOverlayMemory(
  797. max_head_buffer_usage_, MicroArenaBufferAlignment()));
  798. return kTfLiteOk;
  799. }
  800. TfLiteStatus MicroAllocator::AllocateScratchBufferHandles(
  801. ScratchBufferHandle** scratch_buffer_handles, size_t handle_count) {
  802. TFLITE_DCHECK(scratch_buffer_handles != nullptr);
  803. if (scratch_buffer_request_count_ == 0) {
  804. // No scratch buffer requests were requested during model allocation.
  805. return kTfLiteOk;
  806. }
  807. // Allocate a consecutive block of memory store the scratch buffer handles.
  808. // This alignment ensures quick lookup during inference time for the model:
  809. *scratch_buffer_handles = reinterpret_cast<ScratchBufferHandle*>(
  810. persistent_buffer_allocator_->AllocatePersistentBuffer(
  811. sizeof(ScratchBufferHandle) * handle_count,
  812. alignof(ScratchBufferHandle)));
  813. return kTfLiteOk;
  814. }
  815. TfLiteStatus MicroAllocator::InitScratchBufferData() {
  816. // A model is preparing to allocate resources, ensure that scratch buffer
  817. // request counter is cleared:
  818. scratch_buffer_request_count_ = 0;
  819. // All requests will be stored in the head section. Each kernel is allowed at
  820. // most kMaxScratchBuffersPerOp requests. Adjust the head to reserve at most
  821. // that many requests to begin:
  822. scratch_buffer_head_ =
  823. non_persistent_buffer_allocator_->AllocateResizableBuffer(
  824. sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
  825. alignof(internal::ScratchBufferRequest));
  826. if (scratch_buffer_head_ == nullptr) {
  827. return kTfLiteError;
  828. }
  829. return kTfLiteOk;
  830. }
  831. internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
  832. return reinterpret_cast<internal::ScratchBufferRequest*>(AlignPointerUp(
  833. scratch_buffer_head_, alignof(internal::ScratchBufferRequest)));
  834. }
  835. BuiltinDataAllocator* MicroAllocator::GetBuiltinDataAllocator() {
  836. return builtin_data_allocator_;
  837. }
  838. } // namespace tflite