4 лет назад · 32f15fc557
--- a/README.md
+++ b/README.md
@@ -45,7 +45,12 @@ In other cases you can contact the developer via email: <img src="https://raw.gi
 
				 
			
 
				 **General remark:** Beside the `firmware.bin`, typically also the content of `/html` needs to be updated!
			
 
				 
			
 
				+##### Rolling (2021-08-07)
			
 
				 
			
 
				+* GPIO: using the general mqtt main topic for GPIO
			
 
				+* Update tfmicro to new master (2021-08-07)
			
 
				+* Bug fix: mqtt value
			
 
				+* Based on v2021-08-01
			
 
				 
			
 
				 ##### 8.0.5 - Multi Meter Support (2021-08-01)
			
 
				 
			
--- a/code/components/jomjol_controlGPIO/server_GPIO.cpp
+++ b/code/components/jomjol_controlGPIO/server_GPIO.cpp
@@ -8,6 +8,8 @@
 
				 #include "esp_system.h"
			
 
				 #include "esp_event.h"
			
 
				 
			
 
				+#include "server_tflite.h"
			
 
				+
			
 
				 //#define LOG_LOCAL_LEVEL ESP_LOG_DEBUG
			
 
				 #include "esp_log.h"
			
 
				 //#include "errno.h"
			
@@ -303,7 +305,14 @@ bool GpioHandler::readConfig()
 
				     if (!_isEnabled)
			
 
				         return false;
			
 
				 
			
 
				-    std::string mainTopicMQTT = "";
			
 
				+//    std::string mainTopicMQTT = "";
			
 
				+    std::string mainTopicMQTT = GetMQTTMainTopic();
			
 
				+    if (mainTopicMQTT.length() > 0)
			
 
				+    {
			
 
				+        mainTopicMQTT = mainTopicMQTT + "/GPIO";
			
 
				+        ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
			
 
				+    }
			
 
				+
			
 
				     bool registerISR = false;
			
 
				     while (configFile.getNextLine(&line, disabledLine, eof) && !configFile.isNewParagraph(line))
			
 
				     {
			
@@ -315,8 +324,8 @@ bool GpioHandler::readConfig()
 
				         //     std::string gpioStr = pieces_match[1];
			
 
				         ESP_LOGD(TAG_SERVERGPIO, "conf param %s\r\n", toUpper(zerlegt[0]).c_str());
			
 
				         if (toUpper(zerlegt[0]) == "MAINTOPICMQTT") {
			
 
				-            ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
			
 
				-            mainTopicMQTT = zerlegt[1];
			
 
				+//            ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
			
 
				+//            mainTopicMQTT = zerlegt[1];
			
 
				         } else if ((zerlegt[0].rfind("IO", 0) == 0) && (zerlegt.size() >= 6))
			
 
				         {
			
 
				             ESP_LOGI(TAG_SERVERGPIO,"Enable GP%s in %s mode", zerlegt[0].c_str(), zerlegt[1].c_str());
			
--- a/code/components/jomjol_flowcontroll/ClassFlowControll.cpp
+++ b/code/components/jomjol_flowcontroll/ClassFlowControll.cpp
@@ -72,6 +72,19 @@ std::vector<HTMLInfo*> ClassFlowControll::GetAllAnalog()
 
				 }
			
 
				 
			
 
				 
			
 
				+
			
 
				+string ClassFlowControll::GetMQTTMainTopic()
			
 
				+{
			
 
				+    for (int i = 0; i < FlowControll.size(); ++i)
			
 
				+        if (FlowControll[i]->name().compare("ClassFlowMQTT") == 0)
			
 
				+            return ((ClassFlowMQTT*) (FlowControll[i]))->GetMQTTMainTopic();
			
 
				+
			
 
				+
			
 
				+    return "";
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				 void ClassFlowControll::SetInitialParameter(void)
			
 
				 {
			
 
				     AutoStart = false;
			
@@ -445,6 +458,7 @@ bool ClassFlowControll::ReadParameter(FILE* pfile, string& aktparamgraph)
 
				     return true;
			
 
				 }
			
 
				 
			
 
				+
			
 
				 int ClassFlowControll::CleanTempFolder() {
			
 
				     const char* folderPath = "/sdcard/img_tmp";
			
 
				     
			
--- a/code/components/jomjol_flowcontroll/ClassFlowControll.h
+++ b/code/components/jomjol_flowcontroll/ClassFlowControll.h
@@ -49,6 +49,8 @@ public:
 
				 	string GetPrevalue(std::string _number = "");	
			
 
				 	bool ReadParameter(FILE* pfile, string& aktparamgraph);	
			
 
				 
			
 
				+	string GetMQTTMainTopic();
			
 
				+
			
 
				 	esp_err_t GetJPGStream(std::string _fn, httpd_req_t *req);
			
 
				 	esp_err_t SendRawJPG(httpd_req_t *req);
			
 
				 
			
--- a/code/components/jomjol_flowcontroll/ClassFlowMQTT.cpp
+++ b/code/components/jomjol_flowcontroll/ClassFlowMQTT.cpp
@@ -122,6 +122,12 @@ bool ClassFlowMQTT::ReadParameter(FILE* pfile, string& aktparamgraph)
 
				 }
			
 
				 
			
 
				 
			
 
				+string ClassFlowMQTT::GetMQTTMainTopic()
			
 
				+{
			
 
				+    return maintopic;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 bool ClassFlowMQTT::doFlow(string zwtime)
			
 
				 {
			
 
				     if (!MQTTenable)
			
@@ -152,7 +158,7 @@ bool ClassFlowMQTT::doFlow(string zwtime)
 
				 
			
 
				         for (int i = 0; i < NUMBERS.size(); ++i)
			
 
				         {
			
 
				-            result =  NUMBERS[i]->ReturnValue;
			
 
				+            result =  NUMBERS[i]->ReturnValueNoError;
			
 
				             resulterror = NUMBERS[i]->ErrorMessageText;
			
 
				             resultrate = std::to_string(NUMBERS[i]->FlowRateAct);
			
 
				             resulttimestamp = NUMBERS[i]->timeStamp;
			
--- a/code/components/jomjol_flowcontroll/ClassFlowMQTT.h
+++ b/code/components/jomjol_flowcontroll/ClassFlowMQTT.h
@@ -23,6 +23,8 @@ public:
 
				     ClassFlowMQTT(std::vector<ClassFlow*>* lfc);
			
 
				     ClassFlowMQTT(std::vector<ClassFlow*>* lfc, ClassFlow *_prev);
			
 
				 
			
 
				+    string GetMQTTMainTopic();
			
 
				+
			
 
				     bool ReadParameter(FILE* pfile, string& aktparamgraph);
			
 
				     bool doFlow(string time);
			
 
				     string name(){return "ClassFlowMQTT";};
			
--- a/code/components/jomjol_tfliteclass/server_tflite.cpp
+++ b/code/components/jomjol_tfliteclass/server_tflite.cpp
@@ -655,6 +655,11 @@ void TFliteDoAutoStart()
 
				     xTaskCreate(&task_autodoFlow, "task_autodoFlow", configMINIMAL_STACK_SIZE * 64, NULL, tskIDLE_PRIORITY+1, &xHandletask_autodoFlow);
			
 
				 }
			
 
				 
			
 
				+std::string GetMQTTMainTopic()
			
 
				+{
			
 
				+    return tfliteflow.GetMQTTMainTopic();
			
 
				+}
			
 
				+
			
 
				 
			
 
				 
			
 
				 void register_server_tflite_uri(httpd_handle_t server)
			
--- a/code/components/jomjol_tfliteclass/server_tflite.h
+++ b/code/components/jomjol_tfliteclass/server_tflite.h
@@ -1,4 +1,5 @@
 
				 #include <esp_log.h>
			
 
				+#include <string>
			
 
				 
			
 
				 #include <esp_http_server.h>
			
 
				 #include "CImageBasis.h"
			
@@ -13,6 +14,8 @@ void TFliteDoAutoStart();
 
				 
			
 
				 bool isSetupModusActive();
			
 
				 
			
 
				+std::string GetMQTTMainTopic();
			
 
				+
			
 
				 esp_err_t GetJPG(std::string _filename, httpd_req_t *req);
			
 
				 
			
 
				 esp_err_t GetRawJPG(httpd_req_t *req);
			
--- a/code/components/tfmicro.zip
+++ b/code/components/tfmicro.zip
--- a/code/components/tfmicro/CMakeLists.txt
+++ b/code/components/tfmicro/CMakeLists.txt
--- a/code/components/tfmicro/tensorflow/lite/c/builtin_op_data.h
+++ b/code/components/tfmicro/tensorflow/lite/c/builtin_op_data.h
@@ -63,7 +63,6 @@ typedef struct {
 
				 } TfLiteMirrorPaddingParams;
			
 
				 
			
 
				 // Possible fused activation functions.
			
 
				-// TODO(aselle): rename to TfLiteActivation
			
 
				 typedef enum {
			
 
				   kTfLiteActNone = 0,
			
 
				   kTfLiteActRelu,
			
@@ -98,6 +97,8 @@ typedef struct {
 
				   TfLiteFusedActivation activation;
			
 
				 } TfLiteConv3DParams;
			
 
				 
			
 
				+typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
			
 
				+
			
 
				 typedef struct {
			
 
				   TfLitePadding padding;
			
 
				   int stride_width;
			
@@ -328,8 +329,9 @@ typedef struct {
 
				 } TfLitePadV2Params;
			
 
				 
			
 
				 typedef struct {
			
 
				-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
			
 
				-  // For now we will fix the maximum possible number of dimensions.
			
 
				+  // These fields are only used in old models for backward compatibility.
			
 
				+  // In the current implementation, we use the 2nd input of the op as the shape,
			
 
				+  // and these fields are unused.
			
 
				   int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
			
 
				   int num_dimensions;
			
 
				 } TfLiteReshapeParams;
			
@@ -495,6 +497,11 @@ typedef struct {
 
				   TfLiteType value_dtype;
			
 
				 } TfLiteHashtableParams;
			
 
				 
			
 
				+typedef struct {
			
 
				+  const char* container;
			
 
				+  const char* shared_name;
			
 
				+} TfLiteVarHandleParams;
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }  // extern "C"
			
 
				 #endif  // __cplusplus
			
--- a/code/components/tfmicro/tensorflow/lite/c/c_api_types.h
+++ b/code/components/tfmicro/tensorflow/lite/c/c_api_types.h
@@ -29,7 +29,9 @@ extern "C" {
 
				 // library.
			
 
				 #ifdef SWIG
			
 
				 #define TFL_CAPI_EXPORT
			
 
				-#else
			
 
				+#elif defined(TFL_STATIC_LIBRARY_BUILD)
			
 
				+#define TFL_CAPI_EXPORT
			
 
				+#else  // not definded TFL_STATIC_LIBRARY_BUILD
			
 
				 #if defined(_WIN32)
			
 
				 #ifdef TFL_COMPILE_LIBRARY
			
 
				 #define TFL_CAPI_EXPORT __declspec(dllexport)
			
@@ -54,7 +56,19 @@ typedef enum TfLiteStatus {
 
				   // incompatibility between runtime and delegate, e.g., this error is returned
			
 
				   // when trying to apply a TfLite delegate onto a model graph that's already
			
 
				   // immutable.
			
 
				-  kTfLiteApplicationError = 3
			
 
				+  kTfLiteApplicationError = 3,
			
 
				+
			
 
				+  // Generally referring to serialized delegate data not being found.
			
 
				+  // See tflite::delegates::Serialization.
			
 
				+  kTfLiteDelegateDataNotFound = 4,
			
 
				+
			
 
				+  // Generally referring to data-writing issues in delegate serialization.
			
 
				+  // See tflite::delegates::Serialization.
			
 
				+  kTfLiteDelegateDataWriteError = 5,
			
 
				+
			
 
				+  // Generally referring to data-reading issues in delegate serialization.
			
 
				+  // See tflite::delegates::Serialization.
			
 
				+  kTfLiteDelegateDataReadError = 5,
			
 
				 } TfLiteStatus;
			
 
				 
			
 
				 // Types supported by tensor
			
--- a/code/components/tfmicro/tensorflow/lite/c/common.c
+++ b/code/components/tfmicro/tensorflow/lite/c/common.c
@@ -45,8 +45,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 
				 #ifndef TF_LITE_STATIC_MEMORY
			
 
				 
			
 
				 TfLiteIntArray* TfLiteIntArrayCreate(int size) {
			
 
				-  TfLiteIntArray* ret =
			
 
				-      (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
			
 
				+  int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
			
 
				+  if (alloc_size <= 0) return NULL;
			
 
				+  TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
			
 
				+  if (!ret) return ret;
			
 
				   ret->size = size;
			
 
				   return ret;
			
 
				 }
			
@@ -181,9 +183,9 @@ void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
 
				   }
			
 
				   // TODO(b/145340303): Tensor data should be aligned.
			
 
				   if (!tensor->data.raw) {
			
 
				-    tensor->data.raw = malloc(num_bytes);
			
 
				+    tensor->data.raw = (char*)malloc(num_bytes);
			
 
				   } else if (num_bytes > tensor->bytes) {
			
 
				-    tensor->data.raw = realloc(tensor->data.raw, num_bytes);
			
 
				+    tensor->data.raw = (char*)realloc(tensor->data.raw, num_bytes);
			
 
				   }
			
 
				   tensor->bytes = num_bytes;
			
 
				 }
			
@@ -229,7 +231,7 @@ const char* TfLiteTypeGetName(TfLiteType type) {
 
				   return "Unknown type";
			
 
				 }
			
 
				 
			
 
				-TfLiteDelegate TfLiteDelegateCreate() {
			
 
				+TfLiteDelegate TfLiteDelegateCreate(void) {
			
 
				   TfLiteDelegate d = {
			
 
				       .data_ = NULL,
			
 
				       .Prepare = NULL,
			
--- a/code/components/tfmicro/tensorflow/lite/c/common.h
+++ b/code/components/tfmicro/tensorflow/lite/c/common.h
@@ -456,8 +456,8 @@ typedef struct TfLiteTensor {
 
				 } TfLiteTensor;
			
 
				 
			
 
				 // A structure representing an instance of a node.
			
 
				-// This structure only exhibits the inputs, outputs and user defined data, not
			
 
				-// other features like the type.
			
 
				+// This structure only exhibits the inputs, outputs, user defined data and some
			
 
				+// node properties (like statefulness), not other features like the type.
			
 
				 typedef struct TfLiteNode {
			
 
				   // Inputs to this node expressed as indices into the simulator's tensors.
			
 
				   TfLiteIntArray* inputs;
			
@@ -490,6 +490,9 @@ typedef struct TfLiteNode {
 
				   // created by calling `interpreter.ModifyGraphWithDelegate`.
			
 
				   // WARNING: This is an experimental interface that is subject to change.
			
 
				   struct TfLiteDelegate* delegate;
			
 
				+
			
 
				+  // Whether this op might have side effect (e.g. stateful op).
			
 
				+  bool might_have_side_effect;
			
 
				 } TfLiteNode;
			
 
				 #else   // defined(TF_LITE_STATIC_MEMORY)?
			
 
				 // NOTE: This flag is opt-in only at compile time.
			
@@ -640,6 +643,7 @@ typedef struct TfLiteContext {
 
				   // TfLiteDelegates can traverse the current execution plan by iterating
			
 
				   // through each member of this array and using GetNodeAndRegistration() to
			
 
				   // access details about a node. i.e.
			
 
				+  //
			
 
				   // TfLiteIntArray* execution_plan;
			
 
				   // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
			
 
				   // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
			
@@ -648,6 +652,28 @@ typedef struct TfLiteContext {
 
				   //    TfLiteRegistration* reg;
			
 
				   //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
			
 
				   // }
			
 
				+  // Note: the memory pointed by '`*execution_plan` is OWNED by TfLite runtime.
			
 
				+  // Future calls to GetExecutionPlan invalidates earlier outputs. The following
			
 
				+  // code snippet shows the issue of such an invocation pattern. After calling
			
 
				+  // CheckNode, subsequent access to `plan_1st` is undefined.
			
 
				+  //
			
 
				+  // void CheckNode(const TfLiteNode* node) {
			
 
				+  //   ...
			
 
				+  //   TfLiteIntArray* plan_2nd;
			
 
				+  //   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_2nd));
			
 
				+  //   ...
			
 
				+  // }
			
 
				+  //
			
 
				+  // TfLiteIntArray* plan_1st;
			
 
				+  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_1st));
			
 
				+  // for (int exec_index = 0; exec_index < plan_1st->size; exec_index++) {
			
 
				+  //    int node_index = plan_1st->data[exec_index];
			
 
				+  //    TfLiteNode* node;
			
 
				+  //    TfLiteRegistration* reg;
			
 
				+  //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
			
 
				+  //    CheckNode(node);
			
 
				+  // }
			
 
				+  //
			
 
				   // WARNING: This is an experimental interface that is subject to change.
			
 
				   TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
			
 
				                                    TfLiteIntArray** execution_plan);
			
@@ -777,6 +803,18 @@ typedef struct TfLiteContext {
 
				   // WARNING: This method may not be available on all platforms.
			
 
				   TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
			
 
				                                      int tensor_idx);
			
 
				+
			
 
				+  // Retrieves named metadata buffer from the TFLite model.
			
 
				+  // Returns kTfLiteOk if metadata is successfully obtained from the flatbuffer
			
 
				+  // Model: that is, there exists a `metadata` entry with given `name` string.
			
 
				+  // (see TFLite's schema.fbs).
			
 
				+  // The corresponding `buffer` information is populated in `ptr` & `bytes`.
			
 
				+  // The data from `ptr` is valid for the lifetime of the Interpreter.
			
 
				+  //
			
 
				+  // WARNING: This is an experimental interface that is subject to change.
			
 
				+  TfLiteStatus (*GetModelMetadata)(const struct TfLiteContext* context,
			
 
				+                                   const char* name, const char** ptr,
			
 
				+                                   size_t* bytes);
			
 
				 } TfLiteContext;
			
 
				 
			
 
				 typedef struct TfLiteRegistration {
			
@@ -918,7 +956,7 @@ typedef struct TfLiteDelegate {
 
				 
			
 
				 // Build a 'null' delegate, with all the fields properly set to their default
			
 
				 // values.
			
 
				-TfLiteDelegate TfLiteDelegateCreate();
			
 
				+TfLiteDelegate TfLiteDelegateCreate(void);
			
 
				 
			
 
				 #ifdef __cplusplus
			
 
				 }  // extern "C"
			
--- a/code/components/tfmicro/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/code/components/tfmicro/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -373,6 +373,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
 
				       return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				     }
			
 
				 
			
 
				+    case BuiltinOperator_REDUCE_ALL: {
			
 
				+      return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				+    }
			
 
				+
			
 
				     case BuiltinOperator_REDUCE_MAX: {
			
 
				       return ParseReducer(op, error_reporter, allocator, builtin_data);
			
 
				     }
			
@@ -663,7 +667,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
 
				       return kTfLiteOk;
			
 
				     }
			
 
				     case BuiltinOperator_DELEGATE: {
			
 
				-      // TODO(ycling): Revisit when supporting saving delegated models.
			
 
				       TF_LITE_REPORT_ERROR(error_reporter,
			
 
				                            "DELEGATE op shouldn't exist in model.");
			
 
				       return kTfLiteError;
			
@@ -757,7 +760,8 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
 
				       *builtin_data = params.release();
			
 
				       return kTfLiteOk;
			
 
				     }
			
 
				-    case BuiltinOperator_CONV_3D: {
			
 
				+    case BuiltinOperator_CONV_3D:
			
 
				+    case BuiltinOperator_CONV_3D_TRANSPOSE: {
			
 
				       auto params = safe_allocator.Allocate<TfLiteConv3DParams>();
			
 
				       TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				       if (const auto* conv3d_params = op->builtin_options_as_Conv3DOptions()) {
			
@@ -789,6 +793,21 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
 
				       *builtin_data = params.release();
			
 
				       return kTfLiteOk;
			
 
				     }
			
 
				+    case BuiltinOperator_VAR_HANDLE: {
			
 
				+      auto params = safe_allocator.Allocate<TfLiteVarHandleParams>();
			
 
				+      TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+      params->container = nullptr;
			
 
				+      params->shared_name = nullptr;
			
 
				+      if (const auto* var_handle_params =
			
 
				+              op->builtin_options_as_VarHandleOptions()) {
			
 
				+        if (var_handle_params->container())
			
 
				+          params->container = var_handle_params->container()->c_str();
			
 
				+        if (var_handle_params->shared_name())
			
 
				+          params->shared_name = var_handle_params->shared_name()->c_str();
			
 
				+      }
			
 
				+      *builtin_data = params.release();
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				     // Below are the ops with no builtin_data structure.
			
 
				     // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are
			
 
				     // ok for now, since there is no call implementation either.
			
@@ -825,6 +844,9 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
 
				     case BuiltinOperator_HASHTABLE_FIND:
			
 
				     case BuiltinOperator_HASHTABLE_IMPORT:
			
 
				     case BuiltinOperator_HASHTABLE_SIZE:
			
 
				+    case BuiltinOperator_READ_VARIABLE:
			
 
				+    case BuiltinOperator_ASSIGN_VARIABLE:
			
 
				+    case BuiltinOperator_BROADCAST_ARGS:
			
 
				       return kTfLiteOk;
			
 
				     case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
			
 
				       return kTfLiteError;
			
@@ -1372,6 +1394,30 @@ TfLiteStatus ParseHardSwish(const Operator*, ErrorReporter*,
 
				   return kTfLiteOk;
			
 
				 }
			
 
				 
			
 
				+TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                     BuiltinDataAllocator* allocator, void** builtin_data) {
			
 
				+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
			
 
				+
			
 
				+  SafeBuiltinDataAllocator safe_allocator(allocator);
			
 
				+  std::unique_ptr<TfLiteIfParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
			
 
				+      params = safe_allocator.Allocate<TfLiteIfParams>();
			
 
				+  TF_LITE_ENSURE(error_reporter, params != nullptr);
			
 
				+
			
 
				+  const IfOptions* schema_params = op->builtin_options_as_IfOptions();
			
 
				+
			
 
				+  if (schema_params != nullptr) {
			
 
				+    params->then_subgraph_index = schema_params->then_subgraph_index();
			
 
				+    params->else_subgraph_index = schema_params->else_subgraph_index();
			
 
				+  } else {
			
 
				+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
			
 
				+    // reasonable defaults in the params struct. We are not doing so until we
			
 
				+    // better undertand the ramifications of changing the legacy behavior.
			
 
				+  }
			
 
				+
			
 
				+  *builtin_data = params.release();
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				 TfLiteStatus ParseL2Normalization(const Operator* op,
			
 
				                                   ErrorReporter* error_reporter,
			
 
				                                   BuiltinDataAllocator* allocator,
			
--- a/code/components/tfmicro/tensorflow/lite/core/api/flatbuffer_conversions.h
+++ b/code/components/tfmicro/tensorflow/lite/core/api/flatbuffer_conversions.h
@@ -181,6 +181,9 @@ TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
 
				                             BuiltinDataAllocator* allocator,
			
 
				                             void** builtin_data);
			
 
				 
			
 
				+TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
			
 
				+                     BuiltinDataAllocator* allocator, void** builtin_data);
			
 
				+
			
 
				 TfLiteStatus ParseL2Normalization(const Operator* op,
			
 
				                                   ErrorReporter* error_reporter,
			
 
				                                   BuiltinDataAllocator* allocator,
			
--- a/code/components/tfmicro/tensorflow/lite/core/api/op_resolver.cc
+++ b/code/components/tfmicro/tensorflow/lite/core/api/op_resolver.cc
@@ -30,8 +30,7 @@ TfLiteStatus GetRegistrationFromOpCode(
 
				   auto builtin_code = GetBuiltinCode(opcode);
			
 
				   int version = opcode->version();
			
 
				 
			
 
				-  if (builtin_code > BuiltinOperator_MAX ||
			
 
				-      builtin_code < BuiltinOperator_MIN) {
			
 
				+  if (builtin_code > BuiltinOperator_MAX) {
			
 
				     TF_LITE_REPORT_ERROR(
			
 
				         error_reporter,
			
 
				         "Op builtin_code out of range: %d. Are you using old TFLite binary "
			
--- a/code/components/tfmicro/tensorflow/lite/core/api/op_resolver.h
+++ b/code/components/tfmicro/tensorflow/lite/core/api/op_resolver.h
@@ -46,6 +46,22 @@ class OpResolver {
 
				   }
			
 
				 
			
 
				   virtual ~OpResolver() {}
			
 
				+
			
 
				+ private:
			
 
				+  /// Returns true if this OpResolver may contain any "user defined" ops.
			
 
				+  /// By "user defined" ops, we mean any op definitions other than those
			
 
				+  /// contained in tflite::ops::builtin::BuiltinOpResolver.
			
 
				+  ///
			
 
				+  /// If this method returns true, it doesn't necessarily mean that the
			
 
				+  /// OpResolver contains a user-defined op, just that the absence of
			
 
				+  /// user-defined ops can't be guaranteed.
			
 
				+  ///
			
 
				+  /// Note that "user-defined" ops are not the same as "custom" ops;
			
 
				+  /// BuiltinOpResolver may support certain "custom" ops, in addition to
			
 
				+  /// "builtin" ops, and may not support all of the "builtin" op enum values.
			
 
				+  virtual bool MayContainUserDefinedOps() const { return true; }
			
 
				+
			
 
				+  friend class OpResolverInternal;
			
 
				 };
			
 
				 
			
 
				 // Handles the logic for converting between an OperatorCode structure extracted
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/common.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/common.h
@@ -279,81 +279,125 @@ inline Integer FloorLog2(Integer n) {
 
				   }
			
 
				 }
			
 
				 
			
 
				-// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
			
 
				-// softmax
			
 
				-// func - the function to build the LUT for (e.g exp(x))
			
 
				-// min,max - table limits
			
 
				-// table - pointer to buffer
			
 
				-// num - number of elements in the LUT
			
 
				-inline void gen_lut(double (*func)(double), double min, double max,
			
 
				-                    int16_t* table, const int num) {
			
 
				-  // size of table should equal to num + 1
			
 
				-  // last element only for slope calculation
			
 
				-  double step = (max - min) / (num - 1);
			
 
				-  double half_step = step / 2.0;
			
 
				-  for (int i = 0; i < num - 1; i++) {
			
 
				-    double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
			
 
				-    double midpoint_interp_val =
			
 
				-        TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
			
 
				-                     TfLiteRound(func(min + i * step) * 32768.0)) /
			
 
				-                    2.0);
			
 
				-    double midpoint_val =
			
 
				-        TfLiteRound(func(min + i * step + half_step) * 32768.0);
			
 
				-    double midpoint_err = midpoint_interp_val - midpoint_val;
			
 
				-    double bias = TfLiteRound(midpoint_err / 2.0);
			
 
				-    table[i] = std::min<double>(std::max<double>(sample_val - bias, -32768.0),
			
 
				-                                32767.0);
			
 
				-  }
			
 
				-  table[num - 1] = std::min<double>(
			
 
				-      std::max<double>(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
			
 
				+// The size of the LUT depends on the type of input. For int8 inputs a simple
			
 
				+// 256 entries LUT is used. For int16 inputs the high 9 bits are used for
			
 
				+// indexing and the 7 remaining bits are used for interpolation. We thus use a
			
 
				+// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
			
 
				+// to interpolate the last value.
			
 
				+template <typename LutInT>
			
 
				+constexpr int lut_size() {
			
 
				+  static_assert(std::is_same<LutInT, int8_t>::value ||
			
 
				+                    std::is_same<LutInT, int16_t>::value,
			
 
				+                "Only LUTs with int8 or int16 inputs are supported.");
			
 
				+  return std::is_same<LutInT, int8_t>::value ? 256 : 513;
			
 
				 }
			
 
				 
			
 
				-// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
			
 
				-// softmax
			
 
				-// func - the function to build the LUT for (e.g exp(x))
			
 
				-// min,max - table limits
			
 
				-// table - pointer to buffer
			
 
				-// num - number of elements in the LUT
			
 
				-inline void gen_lut(float (*func)(float), float min, float max, int16_t* table,
			
 
				-                    const int num) {
			
 
				-  // size of table should equal to num + 1
			
 
				-  // last element only for slope calculation
			
 
				-  float step = (max - min) / (num - 1);
			
 
				-  float half_step = step / 2.0f;
			
 
				-  for (int i = 0; i < num - 1; i++) {
			
 
				-    float sample_val = TfLiteRound(func(min + i * step) * 32768.0f);
			
 
				-    float midpoint_interp_val =
			
 
				-        TfLiteRound((func(min + (i + 1) * step) * 32768.0f +
			
 
				-                     TfLiteRound(func(min + i * step) * 32768.0f)) /
			
 
				-                    2.0f);
			
 
				-    float midpoint_val =
			
 
				-        TfLiteRound(func(min + i * step + half_step) * 32768.0f);
			
 
				-    float midpoint_err = midpoint_interp_val - midpoint_val;
			
 
				-    float bias = TfLiteRound(midpoint_err / 2.0f);
			
 
				-    table[i] = std::min<float>(std::max<float>(sample_val - bias, -32768.0f),
			
 
				-                               32767.0f);
			
 
				+// Generate a LUT for 'func' which can be used to approximate functions like
			
 
				+// exp, log, ...
			
 
				+//
			
 
				+// - func: the function to build the LUT for (e.g exp(x))
			
 
				+// - input_min, input_max: range of the func inputs
			
 
				+// - output_min, output_max: range of the func outputs
			
 
				+// - lut: pointer to the LUT table to fill, the table must be of size
			
 
				+// lut_size<LutInT>()
			
 
				+template <typename FloatT, typename LutInT, typename LutOutT>
			
 
				+inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
			
 
				+                    FloatT output_min, FloatT output_max, LutOutT* lut) {
			
 
				+  static_assert(std::is_same<LutInT, int8_t>::value ||
			
 
				+                    std::is_same<LutInT, int16_t>::value,
			
 
				+                "Only LUTs with int8 or int16 inputs are supported.");
			
 
				+  static_assert(std::is_same<LutOutT, int8_t>::value ||
			
 
				+                    std::is_same<LutOutT, int16_t>::value,
			
 
				+                "Only LUTs with int8 or int16 outputs are supported.");
			
 
				+  static_assert(std::is_floating_point<FloatT>::value,
			
 
				+                "FloatT must be a floating-point type.");
			
 
				+
			
 
				+  const int nb_steps = std::is_same<LutInT, int8_t>::value ? 256 : 512;
			
 
				+  const FloatT step = (input_max - input_min) / nb_steps;
			
 
				+  const FloatT half_step = step / 2;
			
 
				+  const FloatT output_scaling_inv =
			
 
				+      static_cast<FloatT>(std::numeric_limits<LutOutT>::max() -
			
 
				+                          std::numeric_limits<LutOutT>::min() + 1) /
			
 
				+      (output_max - output_min);
			
 
				+  const FloatT table_min =
			
 
				+      static_cast<FloatT>(std::numeric_limits<LutOutT>::min());
			
 
				+  const FloatT table_max =
			
 
				+      static_cast<FloatT>(std::numeric_limits<LutOutT>::max());
			
 
				+
			
 
				+  for (int i = 0; i < nb_steps; i++) {
			
 
				+    const FloatT val = func(input_min + i * step);
			
 
				+    const FloatT val_midpoint = func(input_min + i * step + half_step);
			
 
				+    const FloatT val_next = func(input_min + (i + 1) * step);
			
 
				+
			
 
				+    const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
			
 
				+    const FloatT midpoint_interp_val =
			
 
				+        TfLiteRound((val_next * output_scaling_inv +
			
 
				+                     TfLiteRound(val * output_scaling_inv)) /
			
 
				+                    2);
			
 
				+    const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
			
 
				+    const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
			
 
				+    const FloatT bias = TfLiteRound(midpoint_err / 2);
			
 
				+
			
 
				+    lut[i] = static_cast<LutOutT>(std::min<FloatT>(
			
 
				+        std::max<FloatT>(sample_val - bias, table_min), table_max));
			
 
				+  }
			
 
				+
			
 
				+  const bool with_extra_interpolation_value =
			
 
				+      std::is_same<LutInT, int16_t>::value;
			
 
				+  if (with_extra_interpolation_value) {
			
 
				+    lut[nb_steps] = static_cast<LutOutT>(std::min<FloatT>(
			
 
				+        std::max<FloatT>(TfLiteRound(func(input_max) * output_scaling_inv),
			
 
				+                         table_min),
			
 
				+        table_max));
			
 
				   }
			
 
				-  table[num - 1] = std::min<float>(
			
 
				-      std::max<float>(TfLiteRound(func(max) * 32768.0f), -32768.0f), 32767.0f);
			
 
				 }
			
 
				 
			
 
				-// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
			
 
				-inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
			
 
				-  // 512 base value, lut[513] only for calculate slope
			
 
				-  uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
			
 
				+// LUT must have 513 values
			
 
				+template <typename LutOutT>
			
 
				+inline LutOutT lut_lookup_with_interpolation(int16_t value,
			
 
				+                                             const LutOutT* lut) {
			
 
				+  static_assert(std::is_same<LutOutT, int8_t>::value ||
			
 
				+                    std::is_same<LutOutT, int16_t>::value,
			
 
				+                "Only LUTs with int8 or int16 outputs are supported.");
			
 
				+  // 512 base values, lut[513] is only used to calculate the slope
			
 
				+  const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
			
 
				   assert(index < 512 && "LUT index out of range.");
			
 
				-  int16_t offset = value & 0x7f;
			
 
				+  const int16_t offset = value & 0x7f;
			
 
				 
			
 
				-  // base and slope are Q0.15
			
 
				-  int16_t base = lut[index];
			
 
				-  int16_t slope = lut[index + 1] - lut[index];
			
 
				+  // Base and slope are Q0.x
			
 
				+  const LutOutT base = lut[index];
			
 
				+  const LutOutT slope = lut[index + 1] - lut[index];
			
 
				 
			
 
				-  // Q0.15 * Q0.7 = Q0.22
			
 
				-  // Round and convert from Q0.22 to Q0.15
			
 
				-  int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
			
 
				+  // Q0.x * Q0.7 = Q0.(x + 7)
			
 
				+  // Round and convert from Q0.(x + 7) to Q0.x
			
 
				+  const int delta = (slope * offset + 64) >> 7;
			
 
				 
			
 
				   // Q0.15 + Q0.15
			
 
				-  return base + delta;
			
 
				+  return static_cast<LutOutT>(base + delta);
			
 
				+}
			
 
				+
			
 
				+// int16_t -> int16_t table lookup with interpolation
			
 
				+// LUT must have 513 values
			
 
				+inline int16_t lut_lookup(int16_t value, const int16_t* lut) {
			
 
				+  return lut_lookup_with_interpolation(value, lut);
			
 
				+}
			
 
				+
			
 
				+// int16_t -> int8_t table lookup with interpolation
			
 
				+// LUT must have 513 values
			
 
				+inline int8_t lut_lookup(int16_t value, const int8_t* lut) {
			
 
				+  return lut_lookup_with_interpolation(value, lut);
			
 
				+}
			
 
				+
			
 
				+// int8_t -> int8_t table lookup without interpolation
			
 
				+// LUT must have 256 values
			
 
				+inline int8_t lut_lookup(int8_t value, const int8_t* lut) {
			
 
				+  return lut[128 + value];
			
 
				+}
			
 
				+
			
 
				+// int8_t -> int16_t table lookup without interpolation
			
 
				+// LUT must have 256 values
			
 
				+inline int16_t lut_lookup(int8_t value, const int16_t* lut) {
			
 
				+  return lut[128 + value];
			
 
				 }
			
 
				 
			
 
				 // Table of sigmoid(i/24) at 0.16 format - 256 elements.
			
@@ -575,7 +619,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
 
				   //                   InputIntegerBits - z_b_headroom - 0.25);
			
 
				   const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
			
 
				       FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
			
 
				-          InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
			
 
				+          static_cast<int32_t>(InputIntegerBits - z_a_headroom_plus_1),
			
 
				+          31 - kAccumIntegerBits)),
			
 
				       shifted_quarter);
			
 
				 
			
 
				   // z_b is treated like z_a, but premultiplying by sqrt(0.5).
			
@@ -585,7 +630,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
 
				       SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
			
 
				   const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
			
 
				       FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
			
 
				-          InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
			
 
				+          static_cast<int32_t>(InputIntegerBits - z_b_headroom),
			
 
				+          31 - kAccumIntegerBits)),
			
 
				       shifted_quarter);
			
 
				 
			
 
				   const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/cppmath.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/cppmath.h
@@ -19,9 +19,8 @@ limitations under the License.
 
				 
			
 
				 namespace tflite {
			
 
				 
			
 
				-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) ||                           \
			
 
				-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
			
 
				-    defined(__ZEPHYR__)
			
 
				+#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
			
 
				+    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
			
 
				 #define TF_LITE_GLOBAL_STD_PREFIX
			
 
				 #else
			
 
				 #define TF_LITE_GLOBAL_STD_PREFIX std
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/optimized/neon_check.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/optimized/neon_check.h
@@ -15,26 +15,6 @@ limitations under the License.
 
				 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
 
				 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
 
				 
			
 
				-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
			
 
				-#define USE_NEON
			
 
				-#include <arm_neon.h>
			
 
				-#endif
			
 
				-
			
 
				-#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
			
 
				-#define USE_NEON
			
 
				-#include "NEON_2_SSE.h"
			
 
				-#endif
			
 
				-
			
 
				-// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
			
 
				-// defined, PortableSomeFunc(args) otherwise.
			
 
				-#ifdef USE_NEON
			
 
				-// Always use Neon code
			
 
				-#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
			
 
				-
			
 
				-#else
			
 
				-// No NEON available: Use Portable code
			
 
				-#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
			
 
				-
			
 
				-#endif  // defined(USE_NEON)
			
 
				+// TFLM does not need to utilize any Neon optimizations.
			
 
				 
			
 
				 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/add.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/add.h
@@ -15,6 +15,8 @@ limitations under the License.
 
				 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
 
				 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
 
				 
			
 
				+#include <type_traits>
			
 
				+
			
 
				 #include "fixedpoint/fixedpoint.h"
			
 
				 #include "tensorflow/lite/kernels/internal/common.h"
			
 
				 
			
@@ -27,25 +29,14 @@ inline void Add(const ArithmeticParams& params,
 
				                 const RuntimeShape& input1_shape, const T* input1_data,
			
 
				                 const RuntimeShape& input2_shape, const T* input2_data,
			
 
				                 const RuntimeShape& output_shape, T* output_data) {
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = ActivationFunctionWithMinMax(
			
 
				-        input1_data[i] + input2_data[i], params.quantized_activation_min,
			
 
				-        params.quantized_activation_max);
			
 
				-  }
			
 
				-}
			
 
				+  T activation_min, activation_max;
			
 
				+  GetActivationParams(params, &activation_min, &activation_max);
			
 
				 
			
 
				-inline void Add(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const float* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const float* input2_data,
			
 
				-                const RuntimeShape& output_shape, float* output_data) {
			
 
				   const int flat_size =
			
 
				       MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    auto x = input1_data[i] + input2_data[i];
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				     output_data[i] = ActivationFunctionWithMinMax(
			
 
				-        x, params.float_activation_min, params.float_activation_max);
			
 
				+        input1_data[i] + input2_data[i], activation_min, activation_max);
			
 
				   }
			
 
				 }
			
 
				 
			
@@ -202,13 +193,12 @@ inline void Add(const ArithmeticParams& params,
 
				   }
			
 
				 }
			
 
				 
			
 
				-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				-                               const RuntimeShape& input1_shape,
			
 
				-                               const float* input1_data,
			
 
				-                               const RuntimeShape& input2_shape,
			
 
				-                               const float* input2_data,
			
 
				-                               const RuntimeShape& output_shape,
			
 
				-                               float* output_data) {
			
 
				+template <typename T>
			
 
				+inline typename std::enable_if<!is_small_integer<T>::value, void>::type
			
 
				+BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				+                   const RuntimeShape& input1_shape, const T* input1_data,
			
 
				+                   const RuntimeShape& input2_shape, const T* input2_data,
			
 
				+                   const RuntimeShape& output_shape, T* output_data) {
			
 
				   NdArrayDesc<4> desc1;
			
 
				   NdArrayDesc<4> desc2;
			
 
				   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
@@ -216,45 +206,8 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 
				   const RuntimeShape extended_output_shape =
			
 
				       RuntimeShape::ExtendedShape(4, output_shape);
			
 
				 
			
 
				-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				-  // col, channel), with extents (batches, height, width, depth), with the
			
 
				-  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				-  // typically 1 element).
			
 
				-  //
			
 
				-  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				-  // first dimension has smallest stride.
			
 
				-  //
			
 
				-  // We name our variables by their Tensorflow convention, but generate C code
			
 
				-  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				-  // best cache behavior.
			
 
				-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				-          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				-              ActivationFunctionWithMinMax(
			
 
				-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
			
 
				-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
			
 
				-                  params.float_activation_min, params.float_activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				-                               const RuntimeShape& input1_shape,
			
 
				-                               const int32_t* input1_data,
			
 
				-                               const RuntimeShape& input2_shape,
			
 
				-                               const int32_t* input2_data,
			
 
				-                               const RuntimeShape& output_shape,
			
 
				-                               int32_t* output_data) {
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				-                                      &desc2);
			
 
				-  const RuntimeShape extended_output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				+  T activation_min, activation_max;
			
 
				+  GetActivationParams(params, &activation_min, &activation_max);
			
 
				 
			
 
				   // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				   // col, channel), with extents (batches, height, width, depth), with the
			
@@ -272,11 +225,10 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 
				       for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				         for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				           output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				-              ActivationFunctionWithMinMax(
			
 
				+              ActivationFunctionWithMinMax<T>(
			
 
				                   input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
			
 
				                       input2_data[SubscriptToIndex(desc2, b, y, x, c)],
			
 
				-                  params.quantized_activation_min,
			
 
				-                  params.quantized_activation_max);
			
 
				+                  activation_min, activation_max);
			
 
				         }
			
 
				       }
			
 
				     }
			
@@ -287,10 +239,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 
				 // is 32-bit for both cases. The overflow does not happen due to the
			
 
				 // choice of the shift (20 or 15, accordingly - see add.cc for more comments).
			
 
				 template <typename T>
			
 
				-inline void BroadcastAdd4DSlow(
			
 
				-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				-    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
			
 
				+inline typename std::enable_if<is_small_integer<T>::value, void>::type
			
 
				+BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				+                   const RuntimeShape& input1_shape, const T* input1_data,
			
 
				+                   const RuntimeShape& input2_shape, const T* input2_data,
			
 
				+                   const RuntimeShape& output_shape, T* output_data) {
			
 
				   NdArrayDesc<4> desc1;
			
 
				   NdArrayDesc<4> desc2;
			
 
				   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/add_n.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/add_n.h
@@ -15,7 +15,10 @@ limitations under the License.
 
				 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
			
 
				 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
			
 
				 
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include <algorithm>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				 namespace reference_ops {
			
@@ -36,6 +39,47 @@ inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
 
				   }
			
 
				 }
			
 
				 
			
 
				+inline void AddN(const ArithmeticParams& params,
			
 
				+                 const RuntimeShape& input_shape, const size_t num_inputs,
			
 
				+                 const int8_t* const* input_data, int8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  // Input offset is negative input zero point. Activation tensors are
			
 
				+  // asymmetric quantized so they span the full int8 range.
			
 
				+  // All inputs should have same zero-point and scale, this is checked during
			
 
				+  // Prepare stage.
			
 
				+  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
			
 
				+  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
			
 
				+
			
 
				+  // All inputs and output should have the same shape, this is checked during
			
 
				+  // Prepare stage.
			
 
				+  const size_t size = input_shape.FlatSize();
			
 
				+  for (size_t i = 0; i < size; ++i) {
			
 
				+    // accumulate in scaled_x before clamping to avoid overflow
			
 
				+    const int32_t x = params.input1_offset;  // x = 0
			
 
				+    const int32_t shifted_x = x * (1 << params.left_shift);
			
 
				+    int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+        shifted_x, params.input1_multiplier, params.input1_shift);
			
 
				+
			
 
				+    for (size_t j = 0; j < num_inputs; ++j) {
			
 
				+      const int32_t y = params.input1_offset + input_data[j][i];
			
 
				+      const int32_t shifted_y = y * (1 << params.left_shift);
			
 
				+      int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+          shifted_y, params.input1_multiplier, params.input1_shift);
			
 
				+      scaled_x += scaled_y;
			
 
				+    }
			
 
				+
			
 
				+    const int32_t raw_output =
			
 
				+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            scaled_x, params.output_multiplier, params.output_shift) +
			
 
				+        params.output_offset;
			
 
				+    const int32_t clamped_output =
			
 
				+        std::min(params.quantized_activation_max,
			
 
				+                 std::max(params.quantized_activation_min, raw_output));
			
 
				+    output_data[i] = static_cast<int8_t>(clamped_output);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 }  // namespace reference_ops
			
 
				 }  // namespace tflite
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/batch_matmul.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/batch_matmul.h
@@ -0,0 +1,275 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_utils_common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+namespace batch_matmul {
			
 
				+
			
 
				+// Determine which dimension is the broadcast dimension.
			
 
				+inline int broadcast_dim(int lhs_dim, int rhs_dim) {
			
 
				+  if (lhs_dim == rhs_dim) return lhs_dim;
			
 
				+  if (lhs_dim == 1) return rhs_dim;
			
 
				+  TFLITE_DCHECK_EQ(rhs_dim, 1);
			
 
				+  return lhs_dim;
			
 
				+}
			
 
				+
			
 
				+// Compute the "extent" for iterating on this dimension.
			
 
				+// If we are broadcasting, then don't advance (i.e return 0).
			
 
				+inline int extent(const RuntimeShape& shape, int x) {
			
 
				+  if (shape.Dims(x) == 1) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+  int prod = 1;
			
 
				+  for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
			
 
				+    prod *= shape.Dims(i);
			
 
				+  }
			
 
				+  return prod;
			
 
				+}
			
 
				+
			
 
				+}  // namespace batch_matmul
			
 
				+
			
 
				+template <typename Ta, typename Tb, typename Tout>
			
 
				+inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
			
 
				+                        const RuntimeShape& rhs_shape, const Tb* rhs_data,
			
 
				+                        const RuntimeShape& output_shape, Tout* output_data) {
			
 
				+  const RuntimeShape extended_lhs_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, lhs_shape);
			
 
				+  const RuntimeShape extended_rhs_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, rhs_shape);
			
 
				+
			
 
				+  const int batch_dim0 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
			
 
				+  const int batch_dim1 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
			
 
				+  const int batch_dim2 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
			
 
				+
			
 
				+  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
			
 
				+  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
			
 
				+  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
			
 
				+  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
			
 
				+  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
			
 
				+  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
			
 
				+
			
 
				+  // Set params for each matrix multiply.
			
 
				+  const int lhs_rows = extended_lhs_shape.Dims(3);
			
 
				+  const int rhs_cols = extended_rhs_shape.Dims(4);
			
 
				+  const int accum_depth = extended_lhs_shape.Dims(4);
			
 
				+
			
 
				+  for (int b0 = 0; b0 < batch_dim0; ++b0) {
			
 
				+    const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
			
 
				+    const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
			
 
				+    for (int b1 = 0; b1 < batch_dim1; ++b1) {
			
 
				+      const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
			
 
				+      const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
			
 
				+      for (int b2 = 0; b2 < batch_dim2; ++b2) {
			
 
				+        const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
			
 
				+        const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
			
 
				+        Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
			
 
				+                                       b1 * batch_dim2 + b2) *
			
 
				+                                          lhs_rows * rhs_cols;
			
 
				+        for (int j = 0; j < rhs_cols; ++j) {
			
 
				+          for (int i = 0; i < lhs_rows; ++i) {
			
 
				+            Tout total = 0;
			
 
				+            for (int k = 0; k < accum_depth; ++k) {
			
 
				+              total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
			
 
				+                       static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
			
 
				+            }
			
 
				+            int idx = lhs_rows * j + i;
			
 
				+            out_ptr[idx] = total;
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
			
 
				+                        const RuntimeShape& rhs_shape, const int8_t* rhs_data,
			
 
				+                        const float* scaling_factors,
			
 
				+                        const int32_t* input_offset, int32_t* row_sums,
			
 
				+                        const RuntimeShape& output_shape, float* output_data,
			
 
				+                        bool* compute_row_sums) {
			
 
				+  const RuntimeShape extended_lhs_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, lhs_shape);
			
 
				+  const RuntimeShape extended_rhs_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, rhs_shape);
			
 
				+
			
 
				+  const int batch_dim0 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
			
 
				+  const int batch_dim1 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
			
 
				+  const int batch_dim2 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
			
 
				+
			
 
				+  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
			
 
				+  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
			
 
				+  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
			
 
				+  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
			
 
				+  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
			
 
				+  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
			
 
				+
			
 
				+  // Set params for each matrix multiply.
			
 
				+  const int lhs_rows = extended_lhs_shape.Dims(3);
			
 
				+  const int rhs_cols = extended_rhs_shape.Dims(4);
			
 
				+  const int accum_depth = extended_lhs_shape.Dims(4);
			
 
				+
			
 
				+  const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
			
 
				+  const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
			
 
				+  const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
			
 
				+  const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
			
 
				+  const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
			
 
				+  const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
			
 
				+
			
 
				+  if (!compute_row_sums || *compute_row_sums) {
			
 
				+    int num_weights_matrices = 1;
			
 
				+    for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
			
 
				+      num_weights_matrices *= extended_lhs_shape.Dims(i);
			
 
				+    }
			
 
				+    tensor_utils::ReductionSumVector(
			
 
				+        lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
			
 
				+    if (compute_row_sums) {
			
 
				+      *compute_row_sums = false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  for (int b0 = 0; b0 < batch_dim0; ++b0) {
			
 
				+    const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
			
 
				+    const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
			
 
				+    const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
			
 
				+    const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
			
 
				+    const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
			
 
				+    for (int b1 = 0; b1 < batch_dim1; ++b1) {
			
 
				+      const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
			
 
				+      const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
			
 
				+      const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
			
 
				+      const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
			
 
				+      const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
			
 
				+      for (int b2 = 0; b2 < batch_dim2; ++b2) {
			
 
				+        const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
			
 
				+        const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
			
 
				+        const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
			
 
				+        const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
			
 
				+        const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
			
 
				+        float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
			
 
				+                                        b1 * batch_dim2 + b2) *
			
 
				+                                           lhs_rows * rhs_cols;
			
 
				+        for (int j = 0; j < rhs_cols; ++j) {
			
 
				+          const float batch_scaling_factor = scale_ptr2[j];
			
 
				+          const float batch_offset = static_cast<float>(ioff_ptr2[j]);
			
 
				+          for (int i = 0; i < lhs_rows; ++i) {
			
 
				+            int32_t total = 0;
			
 
				+            for (int k = 0; k < accum_depth; ++k) {
			
 
				+              total +=
			
 
				+                  lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
			
 
				+            }
			
 
				+            int32_t row_sum = woff_ptr2[i];
			
 
				+            total -= row_sum * batch_offset;
			
 
				+            int idx = lhs_rows * j + i;
			
 
				+            out_ptr[idx] += batch_scaling_factor * total;
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T, typename AccumT>
			
 
				+inline void BatchMatMul(const FullyConnectedParams& params,
			
 
				+                        const RuntimeShape& lhs_shape, const T* lhs_data,
			
 
				+                        const RuntimeShape& rhs_shape, const T* rhs_data,
			
 
				+                        const RuntimeShape& output_shape, T* output_data) {
			
 
				+  const RuntimeShape extended_lhs_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, lhs_shape);
			
 
				+  const RuntimeShape extended_rhs_shape =
			
 
				+      RuntimeShape::ExtendedShape(5, rhs_shape);
			
 
				+
			
 
				+  const int batch_dim0 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
			
 
				+  const int batch_dim1 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
			
 
				+  const int batch_dim2 = batch_matmul::broadcast_dim(
			
 
				+      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
			
 
				+
			
 
				+  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
			
 
				+  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
			
 
				+  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
			
 
				+  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
			
 
				+  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
			
 
				+  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
			
 
				+
			
 
				+  // Set params for each matrix multiply.
			
 
				+  const int lhs_rows = extended_lhs_shape.Dims(3);
			
 
				+  const int rhs_cols = extended_rhs_shape.Dims(4);
			
 
				+  const int accum_depth = extended_lhs_shape.Dims(4);
			
 
				+
			
 
				+  const int32_t input_offset = params.input_offset;
			
 
				+  const int32_t filter_offset = params.weights_offset;
			
 
				+  const int32_t output_offset = params.output_offset;
			
 
				+  const int32_t output_multiplier = params.output_multiplier;
			
 
				+  const int output_shift = params.output_shift;
			
 
				+  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				+  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				+
			
 
				+  for (int b0 = 0; b0 < batch_dim0; ++b0) {
			
 
				+    const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
			
 
				+    const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
			
 
				+    for (int b1 = 0; b1 < batch_dim1; ++b1) {
			
 
				+      const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
			
 
				+      const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
			
 
				+      for (int b2 = 0; b2 < batch_dim2; ++b2) {
			
 
				+        const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
			
 
				+        const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
			
 
				+        T* out_ptr = output_data +
			
 
				+                     ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
			
 
				+                         lhs_rows * rhs_cols;
			
 
				+
			
 
				+        for (int j = 0; j < rhs_cols; ++j) {
			
 
				+          for (int i = 0; i < lhs_rows; ++i) {
			
 
				+            AccumT total = 0;
			
 
				+            for (int k = 0; k < accum_depth; ++k) {
			
 
				+              AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
			
 
				+              AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
			
 
				+              total += (lhs_val + filter_offset) * (rhs_val + input_offset);
			
 
				+            }
			
 
				+            int32_t total_scaled = MultiplyByQuantizedMultiplier(
			
 
				+                total, output_multiplier, output_shift);
			
 
				+            total_scaled += output_offset;
			
 
				+            total_scaled = std::max(total_scaled, output_activation_min);
			
 
				+            total_scaled = std::min(total_scaled, output_activation_max);
			
 
				+            const int idx = lhs_rows * j + i;
			
 
				+            out_ptr[idx] = static_cast<T>(total_scaled);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/cumsum.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/cumsum.h
@@ -0,0 +1,175 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstdint>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
			
 
				+                   bool exclusive, bool reverse, T* output_data) {
			
 
				+  const int32_t rank = shape.DimensionsCount();
			
 
				+  TFLITE_DCHECK_GE(rank, 1);
			
 
				+  TFLITE_DCHECK_GE(axis, 0);
			
 
				+  TFLITE_DCHECK_LT(axis, rank);
			
 
				+
			
 
				+  size_t inner = 1;
			
 
				+  size_t outer = 1;
			
 
				+  size_t depth = 1;
			
 
				+  for (int32_t i = 0; i < rank; i++) {
			
 
				+    if (i < axis)
			
 
				+      inner *= shape.Dims(i);
			
 
				+    else if (i > axis)
			
 
				+      outer *= shape.Dims(i);
			
 
				+    else
			
 
				+      depth = shape.Dims(i);
			
 
				+  }
			
 
				+
			
 
				+  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
			
 
				+    size_t outer_index_adj;
			
 
				+    if (reverse)
			
 
				+      outer_index_adj = (outer - 1) - outer_index;
			
 
				+    else
			
 
				+      outer_index_adj = outer_index;
			
 
				+    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
			
 
				+      T accumulator = 0;
			
 
				+      size_t inner_index_adj;
			
 
				+      if (reverse)
			
 
				+        inner_index_adj = (inner - 1) - inner_index;
			
 
				+      else
			
 
				+        inner_index_adj = inner_index;
			
 
				+      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
			
 
				+        size_t depth_index_adj;
			
 
				+        if (reverse)
			
 
				+          depth_index_adj = (depth - 1) - depth_index;
			
 
				+        else
			
 
				+          depth_index_adj = depth_index;
			
 
				+
			
 
				+        size_t index = outer_index_adj;
			
 
				+        index += inner_index_adj * depth * outer;
			
 
				+        index += depth_index_adj * outer;
			
 
				+
			
 
				+        if (exclusive) {
			
 
				+          output_data[index] = accumulator;
			
 
				+          accumulator += input_data[index];
			
 
				+        } else {
			
 
				+          accumulator += input_data[index];
			
 
				+          output_data[index] = accumulator;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+//
			
 
				+// Quantized INT8 CUMSUM
			
 
				+//
			
 
				+inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
			
 
				+                   const RuntimeShape& shape, int32_t axis, bool exclusive,
			
 
				+                   bool reverse, int8_t* output_data) {
			
 
				+  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				+                   params.quantized_activation_max);
			
 
				+  // Input offset is negative input zero point. Activation tensors are
			
 
				+  // asymmetric quantized so they span the full int8 range.
			
 
				+  // All inputs should have same zero-point and scale, this is checked during
			
 
				+  // Prepare stage.
			
 
				+  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
			
 
				+  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
			
 
				+
			
 
				+  const int32_t rank = shape.DimensionsCount();
			
 
				+  TFLITE_DCHECK_GE(rank, 1);
			
 
				+  TFLITE_DCHECK_GE(axis, 0);
			
 
				+  TFLITE_DCHECK_LT(axis, rank);
			
 
				+
			
 
				+  size_t inner = 1;
			
 
				+  size_t outer = 1;
			
 
				+  size_t depth = 1;
			
 
				+  for (int32_t i = 0; i < rank; i++) {
			
 
				+    if (i < axis)
			
 
				+      inner *= shape.Dims(i);
			
 
				+    else if (i > axis)
			
 
				+      outer *= shape.Dims(i);
			
 
				+    else
			
 
				+      depth = shape.Dims(i);
			
 
				+  }
			
 
				+
			
 
				+  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
			
 
				+    size_t outer_index_adj;
			
 
				+    if (reverse)
			
 
				+      outer_index_adj = (outer - 1) - outer_index;
			
 
				+    else
			
 
				+      outer_index_adj = outer_index;
			
 
				+    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
			
 
				+      int32_t accumulator = params.input1_offset;  // accumulator = 0
			
 
				+      accumulator *= (1 << params.left_shift);
			
 
				+      accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+          accumulator, params.input1_multiplier, params.input1_shift);
			
 
				+
			
 
				+      size_t inner_index_adj;
			
 
				+      if (reverse)
			
 
				+        inner_index_adj = (inner - 1) - inner_index;
			
 
				+      else
			
 
				+        inner_index_adj = inner_index;
			
 
				+
			
 
				+      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
			
 
				+        size_t depth_index_adj;
			
 
				+        if (reverse)
			
 
				+          depth_index_adj = (depth - 1) - depth_index;
			
 
				+        else
			
 
				+          depth_index_adj = depth_index;
			
 
				+
			
 
				+        size_t index = outer_index_adj;
			
 
				+        index += inner_index_adj * depth * outer;
			
 
				+        index += depth_index_adj * outer;
			
 
				+
			
 
				+        const int32_t y = params.input1_offset + input_data[index];
			
 
				+        const int32_t shifted_y = y * (1 << params.left_shift);
			
 
				+        const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+            shifted_y, params.input1_multiplier, params.input1_shift);
			
 
				+
			
 
				+        int32_t scaled_output;
			
 
				+        if (exclusive) {
			
 
				+          scaled_output = accumulator;
			
 
				+          accumulator += scaled_y;
			
 
				+        } else {
			
 
				+          accumulator += scaled_y;
			
 
				+          scaled_output = accumulator;
			
 
				+        }
			
 
				+
			
 
				+        const int32_t raw_output =
			
 
				+            MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                scaled_output, params.output_multiplier, params.output_shift) +
			
 
				+            params.output_offset;
			
 
				+        const int32_t clamped_output =
			
 
				+            std::min(params.quantized_activation_max,
			
 
				+                     std::max(params.quantized_activation_min, raw_output));
			
 
				+        output_data[index] = static_cast<int8_t>(clamped_output);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/depth_to_space.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/depth_to_space.h
@@ -0,0 +1,79 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
			
 
				+                         const RuntimeShape& unextended_input_shape,
			
 
				+                         const T* input_data,
			
 
				+                         const RuntimeShape& unextended_output_shape,
			
 
				+                         T* output_data) {
			
 
				+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  const int input_depth = input_shape.Dims(3);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_batch = input_shape.Dims(0);
			
 
				+
			
 
				+  const int output_depth = output_shape.Dims(3);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_batch = output_shape.Dims(0);
			
 
				+
			
 
				+  const int32_t block_size = op_params.block_size;
			
 
				+
			
 
				+  TFLITE_DCHECK_EQ(input_width * block_size, output_width);
			
 
				+  TFLITE_DCHECK_EQ(input_height * block_size, output_height);
			
 
				+  TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
			
 
				+  TFLITE_DCHECK_EQ(input_batch, output_batch);
			
 
				+
			
 
				+  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				+    for (int out_h = 0; out_h < output_height; ++out_h) {
			
 
				+      for (int out_w = 0; out_w < output_width; ++out_w) {
			
 
				+        for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				+          const int in_d =
			
 
				+              out_d + ((out_h % block_size) * block_size + out_w % block_size) *
			
 
				+                          output_depth;
			
 
				+
			
 
				+          const int in_w = out_w / block_size;
			
 
				+          const int in_h = out_h / block_size;
			
 
				+          const int in_b = out_b;
			
 
				+
			
 
				+          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
			
 
				+          const int output_index =
			
 
				+              Offset(output_shape, out_b, out_h, out_w, out_d);
			
 
				+
			
 
				+          output_data[output_index] = input_data[input_index];
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/div.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/div.h
@@ -1,239 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  // Input offset is negative input zero point. Activation tensors are
			
 
				-  // asymmetric quantized so they span the full int8 range.
			
 
				-  constexpr int32_t max_value =
			
 
				-      static_cast<int32_t>(std::numeric_limits<T>::max());
			
 
				-  TFLITE_DCHECK_GE(params.input1_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LE(params.input1_offset, max_value);
			
 
				-  TFLITE_DCHECK_GE(params.input2_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LE(params.input2_offset, max_value);
			
 
				-  TFLITE_DCHECK_GE(params.output_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LE(params.output_offset, max_value);
			
 
				-}
			
 
				-
			
 
				-// Element-wise div that can often be used for inner loop of broadcast Div as
			
 
				-// well as the non-broadcast Div.
			
 
				-template <typename T>
			
 
				-inline void DivElementwise(int size, const ArithmeticParams& params,
			
 
				-                           const T* input1_data, const T* input2_data,
			
 
				-                           T* output_data) {
			
 
				-  DivCheckArithmeticParams<T>(params);
			
 
				-
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				-    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				-    TFLITE_DCHECK_NE(input2_val, 0);
			
 
				-    int recip_shift;
			
 
				-    const int32_t input2_inv =
			
 
				-        (input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
			
 
				-                         : -GetReciprocal(-input2_val, 31, &recip_shift);
			
 
				-    const int headroom = CountLeadingSignBits(input1_val);
			
 
				-    const int32_t unscaled_quotient =
			
 
				-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
			
 
				-                                                    headroom);
			
 
				-    const int total_shift = params.output_shift - recip_shift - headroom;
			
 
				-    const int32_t unclamped_result =
			
 
				-        params.output_offset +
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            unscaled_quotient, params.output_multiplier, total_shift);
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				-    output_data[i] = static_cast<T>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Div(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-inline void Div(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const int8_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const int8_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-template <typename T, int N = 5>
			
 
				-inline void BroadcastDivSlowQuantized(
			
 
				-    const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
			
 
				-    const T* input1_data, const RuntimeShape& unextended_input2_shape,
			
 
				-    const T* input2_data, const RuntimeShape& unextended_output_shape,
			
 
				-    T* output_data) {
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
			
 
				-
			
 
				-  NdArrayDesc<N> desc1;
			
 
				-  NdArrayDesc<N> desc2;
			
 
				-  NdArrayDesc<N> output_desc;
			
 
				-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				-                                      unextended_input2_shape, &desc1, &desc2);
			
 
				-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				-                 &output_desc);
			
 
				-
			
 
				-  DivCheckArithmeticParams<T>(params);
			
 
				-
			
 
				-  auto div_func = [&](int indexes[N]) {
			
 
				-    const int32_t input1_val =
			
 
				-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
			
 
				-    const int32_t input2_val =
			
 
				-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
			
 
				-    TFLITE_DCHECK_NE(input2_val, 0);
			
 
				-    int recip_shift;
			
 
				-    const int32_t input2_inv =
			
 
				-        (input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
			
 
				-                         : -GetReciprocal(-input2_val, 31, &recip_shift);
			
 
				-    const int headroom = CountLeadingSignBits(input1_val);
			
 
				-    const int32_t unscaled_quotient =
			
 
				-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
			
 
				-                                                    headroom);
			
 
				-    const int total_shift = params.output_shift - recip_shift - headroom;
			
 
				-    const int32_t unclamped_result =
			
 
				-        params.output_offset +
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            unscaled_quotient, params.output_multiplier, total_shift);
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				-    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				-        static_cast<T>(clamped_output);
			
 
				-  };
			
 
				-  NDOpsHelper<N>(output_desc, div_func);
			
 
				-}
			
 
				-
			
 
				-template <int N = 5>
			
 
				-inline void BroadcastDivSlow(const ArithmeticParams& params,
			
 
				-                             const RuntimeShape& unextended_input1_shape,
			
 
				-                             const uint8_t* input1_data,
			
 
				-                             const RuntimeShape& unextended_input2_shape,
			
 
				-                             const uint8_t* input2_data,
			
 
				-                             const RuntimeShape& unextended_output_shape,
			
 
				-                             uint8_t* output_data) {
			
 
				-  BroadcastDivSlowQuantized<uint8_t, N>(
			
 
				-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
			
 
				-      input2_data, unextended_output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-template <int N = 5>
			
 
				-inline void BroadcastDivSlow(const ArithmeticParams& params,
			
 
				-                             const RuntimeShape& unextended_input1_shape,
			
 
				-                             const int8_t* input1_data,
			
 
				-                             const RuntimeShape& unextended_input2_shape,
			
 
				-                             const int8_t* input2_data,
			
 
				-                             const RuntimeShape& unextended_output_shape,
			
 
				-                             int8_t* output_data) {
			
 
				-  BroadcastDivSlowQuantized<int8_t, N>(
			
 
				-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
			
 
				-      input2_data, unextended_output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
			
 
				-// dimensionality if the runtime code does a single loop over one dimension
			
 
				-// that handles broadcasting as the base case. The code generator would then
			
 
				-// generate max(D1, D2) nested for loops.
			
 
				-template <typename T, int N = 5>
			
 
				-void BroadcastDivSlow(const ArithmeticParams& params,
			
 
				-                      const RuntimeShape& unextended_input1_shape,
			
 
				-                      const T* input1_data,
			
 
				-                      const RuntimeShape& unextended_input2_shape,
			
 
				-                      const T* input2_data,
			
 
				-                      const RuntimeShape& unextended_output_shape,
			
 
				-                      T* output_data) {
			
 
				-  T output_activation_min;
			
 
				-  T output_activation_max;
			
 
				-  GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
			
 
				-
			
 
				-  NdArrayDesc<N> desc1;
			
 
				-  NdArrayDesc<N> desc2;
			
 
				-  NdArrayDesc<N> output_desc;
			
 
				-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				-                                      unextended_input2_shape, &desc1, &desc2);
			
 
				-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				-                 &output_desc);
			
 
				-
			
 
				-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				-  // col, channel), with extents (batches, height, width, depth), with the
			
 
				-  // trailing dimension changing most rapidly (channels has the smallest
			
 
				-  // stride, typically 1 element).
			
 
				-  //
			
 
				-  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				-  // first dimension has smallest stride.
			
 
				-
			
 
				-  auto div_func = [&](int indexes[N]) {
			
 
				-    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				-        ActivationFunctionWithMinMax(
			
 
				-            input1_data[SubscriptToIndex(desc1, indexes)] /
			
 
				-                input2_data[SubscriptToIndex(desc2, indexes)],
			
 
				-            output_activation_min, output_activation_max);
			
 
				-  };
			
 
				-  NDOpsHelper<N>(output_desc, div_func);
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void Div(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                const RuntimeShape& output_shape, T* output_data) {
			
 
				-  T output_activation_min;
			
 
				-  T output_activation_max;
			
 
				-  GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				-
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = ActivationFunctionWithMinMax(
			
 
				-        input1_data[i] / input2_data[i], output_activation_min,
			
 
				-        output_activation_max);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/floor_div.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/floor_div.h
@@ -0,0 +1,35 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+#include <functional>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+T FloorDiv(T input1, T input2) {
			
 
				+  return std::floor(std::divides<double>()(static_cast<double>(input1),
			
 
				+                                           static_cast<double>(input2)));
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/floor_mod.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/floor_mod.h
@@ -0,0 +1,44 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
			
 
				+
			
 
				+#include <cmath>
			
 
				+#include <functional>
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+T FloorMod(T input1, T input2) {
			
 
				+  struct FloatMod {
			
 
				+    float operator()(const float lhs, const float rhs) const {
			
 
				+      return std::fmod(lhs, rhs);
			
 
				+    }
			
 
				+  };
			
 
				+  using ModFunc = typename std::conditional<std::is_integral<T>::value,
			
 
				+                                            std::modulus<T>, FloatMod>::type;
			
 
				+  ModFunc mod_func;
			
 
				+  T trunc_mod = mod_func(input1, input2);
			
 
				+  return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
			
 
				+             ? (trunc_mod + input2)
			
 
				+             : trunc_mod;
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -21,7 +21,7 @@ limitations under the License.
 
				 namespace tflite {
			
 
				 namespace reference_integer_ops {
			
 
				 
			
 
				-inline void AveragePool(const PoolParams& params,
			
 
				+inline bool AveragePool(const PoolParams& params,
			
 
				                         const RuntimeShape& input_shape,
			
 
				                         const int8_t* input_data,
			
 
				                         const RuntimeShape& output_shape, int8_t* output_data) {
			
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
 
				               filter_count++;
			
 
				             }
			
 
				           }
			
 
				+          if (filter_count == 0) return false;
			
 
				           // Round to the closest integer value.
			
 
				           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
			
 
				                         : (acc - filter_count / 2) / filter_count;
			
@@ -77,6 +78,7 @@ inline void AveragePool(const PoolParams& params,
 
				       }
			
 
				     }
			
 
				   }
			
 
				+  return true;
			
 
				 }
			
 
				 
			
 
				 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
@@ -136,7 +138,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
 
				   }
			
 
				 }
			
 
				 
			
 
				-inline void AveragePool(const PoolParams& params,
			
 
				+inline bool AveragePool(const PoolParams& params,
			
 
				                         const RuntimeShape& input_shape,
			
 
				                         const int16_t* input_data,
			
 
				                         const RuntimeShape& output_shape,
			
@@ -182,6 +184,7 @@ inline void AveragePool(const PoolParams& params,
 
				               filter_count++;
			
 
				             }
			
 
				           }
			
 
				+          if (filter_count == 0) return false;
			
 
				           // Round to the closest integer value.
			
 
				           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
			
 
				                         : (acc - filter_count / 2) / filter_count;
			
@@ -193,6 +196,7 @@ inline void AveragePool(const PoolParams& params,
 
				       }
			
 
				     }
			
 
				   }
			
 
				+  return true;
			
 
				 }
			
 
				 
			
 
				 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/log_softmax.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/log_softmax.h
@@ -0,0 +1,256 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstddef>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void LogSoftmax(const SoftmaxParams& params,
			
 
				+                       const RuntimeShape& input_shape, const float* input_data,
			
 
				+                       const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    // Find max element value which we'll use to ensure numerical stability
			
 
				+    // taking advantage of the following equality:
			
 
				+    // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
			
 
				+    float max = std::numeric_limits<float>::lowest();
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      max = std::max(max, input_data[i * depth + c]);
			
 
				+    }
			
 
				+
			
 
				+    // Compute sum.
			
 
				+    float sum = 0.f;
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      sum += std::exp(input_data[i * depth + c] - max);
			
 
				+    }
			
 
				+
			
 
				+    // Compute result.
			
 
				+    const float log_sum = std::log(sum);
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void LogSoftmax(const SoftmaxParams& params,
			
 
				+                       const RuntimeShape& input_shape,
			
 
				+                       const uint8_t* input_data,
			
 
				+                       const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				+  const int32_t input_multiplier = params.input_multiplier;
			
 
				+  const int32_t input_left_shift = params.input_left_shift;
			
 
				+  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
			
 
				+  const int32_t reverse_scaling_right_shift =
			
 
				+      params.reverse_scaling_right_shift;
			
 
				+  const int diff_min = params.diff_min;
			
 
				+  // The representation chosen for the input to the exp() function is Q5.26.
			
 
				+  // We need to leave extra space since values that we skip might be as large
			
 
				+  // as -32 before multiplying by input_beta_multiplier, and therefore as
			
 
				+  // large as -16 afterwards.  Note that exp(-8) is definitely not
			
 
				+  // insignificant to accumulation, but exp(-16) definitely is.
			
 
				+  static constexpr int kScaledDiffIntegerBits = 5;
			
 
				+  static constexpr int kAccumulationIntegerBits = 12;
			
 
				+  static constexpr int kOutputIntegerBits = 4;
			
 
				+  using FixedPointScaledDiff =
			
 
				+      gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
			
 
				+  using FixedPointAccum =
			
 
				+      gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
			
 
				+
			
 
				+  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+  const int outer_size =
			
 
				+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				+  const int depth =
			
 
				+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				+
			
 
				+  for (int i = 0; i < outer_size; ++i) {
			
 
				+    uint8_t max_in_row = 0;
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
			
 
				+    }
			
 
				+
			
 
				+    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      int32_t input_diff =
			
 
				+          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
			
 
				+      if (input_diff >= diff_min) {
			
 
				+        const int32_t input_diff_rescaled =
			
 
				+            MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				+                input_diff, input_multiplier, input_left_shift);
			
 
				+        const FixedPointScaledDiff scaled_diff_f8 =
			
 
				+            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
			
 
				+        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
			
 
				+                                        exp_on_negative_values(scaled_diff_f8));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    const int32_t fixed_log_sum_of_exps =
			
 
				+        log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
			
 
				+            sum_of_exps)
			
 
				+            .raw();
			
 
				+
			
 
				+    // rescaled_diff_min is smallest representable in
			
 
				+    // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
			
 
				+    // log-sub-exps that will be subtracted in the loop.
			
 
				+    //
			
 
				+    // The thresholds diff_min, etc are negative.
			
 
				+    const int rescaled_diff_min =
			
 
				+        fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
			
 
				+    const int adjusted_diff_min =
			
 
				+        std::max(static_cast<int32_t>(
			
 
				+                     diff_min - 1),  // Note use of > below instead of >= above.
			
 
				+                 MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				+                     rescaled_diff_min, reverse_scaling_divisor,
			
 
				+                     -reverse_scaling_right_shift));
			
 
				+
			
 
				+    for (int c = 0; c < depth; ++c) {
			
 
				+      int32_t input_diff =
			
 
				+          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
			
 
				+      if (input_diff > adjusted_diff_min) {
			
 
				+        const int32_t input_diff_rescaled =
			
 
				+            MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				+                input_diff, input_multiplier, input_left_shift);
			
 
				+        int32_t unsat_output =
			
 
				+            gemmlowp::RoundingDivideByPOT(
			
 
				+                (input_diff_rescaled - fixed_log_sum_of_exps),
			
 
				+                31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
			
 
				+            255;
			
 
				+
			
 
				+        output_data[i * depth + c] = static_cast<uint8_t>(
			
 
				+            std::max(std::min(unsat_output, static_cast<int32_t>(255)),
			
 
				+                     static_cast<int32_t>(0)));
			
 
				+      } else {
			
 
				+        // Set output to smallest value.
			
 
				+        output_data[i * depth + c] = 0;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void LogSoftmaxQuantized(const SoftmaxParams& params,
			
 
				+                                const size_t outer_size, const size_t depth,
			
 
				+                                const RuntimeShape& input_shape,
			
 
				+                                const T* input_data,
			
 
				+                                const RuntimeShape& output_shape,
			
 
				+                                T* output_data) {
			
 
				+  const int32_t input_multiplier = params.input_multiplier;
			
 
				+  const int32_t input_left_shift = params.input_left_shift;
			
 
				+  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
			
 
				+  const int32_t reverse_scaling_right_shift =
			
 
				+      params.reverse_scaling_right_shift;
			
 
				+  const int diff_min = params.diff_min;
			
 
				+
			
 
				+  static constexpr T kMinT8 = std::numeric_limits<T>::min();
			
 
				+  static constexpr T kMaxT8 = std::numeric_limits<T>::max();
			
 
				+  static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
			
 
				+
			
 
				+  // All IntegerBits must agree with Prepare function.
			
 
				+  // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
			
 
				+  static constexpr int kInputIntegerBits = 5;
			
 
				+  static constexpr int kAccumulationIntegerBits = 12;
			
 
				+  static constexpr int kOutputIntegerBits = 4;
			
 
				+  using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
			
 
				+  using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
			
 
				+
			
 
				+  for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
			
 
				+    T max_in_row = kMinT8;
			
 
				+    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				+      max_in_row =
			
 
				+          std::max(max_in_row, input_data[outer_index * depth + inner_index]);
			
 
				+    }
			
 
				+
			
 
				+    // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
			
 
				+    F12 sum_of_exps_in_q12 = F12::FromRaw(0);
			
 
				+    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				+      int32_t input_diff =
			
 
				+          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
			
 
				+          max_in_row;
			
 
				+      if (input_diff >= diff_min) {
			
 
				+        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
			
 
				+            input_diff, input_multiplier, input_left_shift);
			
 
				+        sum_of_exps_in_q12 =
			
 
				+            sum_of_exps_in_q12 +
			
 
				+            gemmlowp::Rescale<kAccumulationIntegerBits>(
			
 
				+                exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    const int32_t log_sum_of_exps_in_q5 =
			
 
				+        log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
			
 
				+            sum_of_exps_in_q12)
			
 
				+            .raw();
			
 
				+
			
 
				+    // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
			
 
				+    // smallest representable in Q5.26 plus the log_sum_of_exps.
			
 
				+    const int32_t shifted_log_sum_of_exps_in_q5 =
			
 
				+        log_sum_of_exps_in_q5 + kMinInt32;
			
 
				+    const int32_t adjusted_diff_min =
			
 
				+        std::max(static_cast<int32_t>(diff_min - 1),
			
 
				+                 MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
			
 
				+                                               reverse_scaling_divisor,
			
 
				+                                               -reverse_scaling_right_shift));
			
 
				+
			
 
				+    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				+      int32_t input_diff =
			
 
				+          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
			
 
				+          max_in_row;
			
 
				+      // Note use of > below instead of >= above.
			
 
				+      if (input_diff > adjusted_diff_min) {
			
 
				+        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
			
 
				+            input_diff, input_multiplier, input_left_shift);
			
 
				+
			
 
				+        // Rescale and downcast.
			
 
				+        int32_t output_in_q27 =
			
 
				+            gemmlowp::RoundingDivideByPOT(
			
 
				+                (input_diff_in_q5 - log_sum_of_exps_in_q5),
			
 
				+                31 - kInputIntegerBits - kOutputIntegerBits) +
			
 
				+            kMaxT8;
			
 
				+
			
 
				+        output_in_q27 =
			
 
				+            std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
			
 
				+                     static_cast<int32_t>(kMinT8));
			
 
				+        output_data[outer_index * depth + inner_index] =
			
 
				+            static_cast<T>(output_in_q27);
			
 
				+      } else {
			
 
				+        output_data[outer_index * depth + inner_index] = kMinT8;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
			
 
				+                       const size_t depth, const RuntimeShape& input_shape,
			
 
				+                       const int8_t* input_data,
			
 
				+                       const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				+  LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
			
 
				+                      output_shape, output_data);
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/mul.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/mul.h
@@ -51,7 +51,7 @@ inline void Mul(const ArithmeticParams& params,
 
				   GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				 
			
 
				   const int flat_size =
			
 
				-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+      MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
			
 
				   for (int i = 0; i < flat_size; ++i) {
			
 
				     output_data[i] = ActivationFunctionWithMinMax(
			
 
				         input1_data[i] * input2_data[i], output_activation_min,
			
@@ -66,7 +66,7 @@ inline void Mul(const ArithmeticParams& params,
 
				   TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				                    params.quantized_activation_max);
			
 
				   const int flat_size =
			
 
				-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				+      MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
			
 
				 
			
 
				   MulElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				 }
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/pad.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/pad.h
@@ -24,8 +24,8 @@ namespace tflite {
 
				 
			
 
				 namespace reference_ops {
			
 
				 
			
 
				-// TFLite Pad supports activation tensors with up to 4 dimensions.
			
 
				-constexpr int PadKernelMaxDimensionCount() { return 4; }
			
 
				+// TFLite Pad supports activation tensors with up to 5 dimensions.
			
 
				+constexpr int PadKernelMaxDimensionCount() { return 5; }
			
 
				 
			
 
				 // There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
			
 
				 // scalar input that provides the padding value.  Therefore pad_value_ptr can be
			
@@ -46,8 +46,8 @@ inline void PadImpl(const tflite::PadParams& op_params,
 
				   TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
			
 
				   TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
			
 
				 
			
 
				-  // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
			
 
				-  // pad them to 4 dims (yes, we are "padding the padding").
			
 
				+  // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
			
 
				+  // pad them to 5 dims (yes, we are "padding the padding").
			
 
				   int left_padding_copy[PadKernelMaxDimensionCount()];
			
 
				   for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
			
 
				     left_padding_copy[i] = 0;
			
@@ -67,39 +67,46 @@ inline void PadImpl(const tflite::PadParams& op_params,
 
				   }
			
 
				 
			
 
				   const int output_batch = ext_output_shape.Dims(0);
			
 
				-  const int output_height = ext_output_shape.Dims(1);
			
 
				-  const int output_width = ext_output_shape.Dims(2);
			
 
				-  const int output_depth = ext_output_shape.Dims(3);
			
 
				+  const int output_plane = ext_output_shape.Dims(1);
			
 
				+  const int output_height = ext_output_shape.Dims(2);
			
 
				+  const int output_width = ext_output_shape.Dims(3);
			
 
				+  const int output_depth = ext_output_shape.Dims(4);
			
 
				 
			
 
				   const int left_b_padding = left_padding_copy[0];
			
 
				-  const int left_h_padding = left_padding_copy[1];
			
 
				-  const int left_w_padding = left_padding_copy[2];
			
 
				-  const int left_d_padding = left_padding_copy[3];
			
 
				+  const int left_p_padding = left_padding_copy[1];
			
 
				+  const int left_h_padding = left_padding_copy[2];
			
 
				+  const int left_w_padding = left_padding_copy[3];
			
 
				+  const int left_d_padding = left_padding_copy[4];
			
 
				 
			
 
				   const int right_b_padding = right_padding_copy[0];
			
 
				-  const int right_h_padding = right_padding_copy[1];
			
 
				-  const int right_w_padding = right_padding_copy[2];
			
 
				-  const int right_d_padding = right_padding_copy[3];
			
 
				+  const int right_p_padding = right_padding_copy[1];
			
 
				+  const int right_h_padding = right_padding_copy[2];
			
 
				+  const int right_w_padding = right_padding_copy[3];
			
 
				+  const int right_d_padding = right_padding_copy[4];
			
 
				 
			
 
				   const T pad_value = *pad_value_ptr;
			
 
				 
			
 
				   const T* in_ptr = input_data;
			
 
				   T* out_ptr = output_data;
			
 
				   for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				-    for (int out_h = 0; out_h < output_height; ++out_h) {
			
 
				-      for (int out_w = 0; out_w < output_width; ++out_w) {
			
 
				-        for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				-          if (out_b < left_b_padding ||
			
 
				-              out_b >= output_batch - right_b_padding ||
			
 
				-              out_h < left_h_padding ||
			
 
				-              out_h >= output_height - right_h_padding ||
			
 
				-              out_w < left_w_padding ||
			
 
				-              out_w >= output_width - right_w_padding ||
			
 
				-              out_d < left_d_padding ||
			
 
				-              out_d >= output_depth - right_d_padding) {
			
 
				-            *out_ptr++ = pad_value;
			
 
				-          } else {
			
 
				-            *out_ptr++ = *in_ptr++;
			
 
				+    for (int out_p = 0; out_p < output_plane; ++out_p) {
			
 
				+      for (int out_h = 0; out_h < output_height; ++out_h) {
			
 
				+        for (int out_w = 0; out_w < output_width; ++out_w) {
			
 
				+          for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				+            if (out_b < left_b_padding ||
			
 
				+                out_b >= output_batch - right_b_padding ||
			
 
				+                out_p < left_p_padding ||
			
 
				+                out_p >= output_plane - right_p_padding ||
			
 
				+                out_h < left_h_padding ||
			
 
				+                out_h >= output_height - right_h_padding ||
			
 
				+                out_w < left_w_padding ||
			
 
				+                out_w >= output_width - right_w_padding ||
			
 
				+                out_d < left_d_padding ||
			
 
				+                out_d >= output_depth - right_d_padding) {
			
 
				+              *out_ptr++ = pad_value;
			
 
				+            } else {
			
 
				+              *out_ptr++ = *in_ptr++;
			
 
				+            }
			
 
				           }
			
 
				         }
			
 
				       }
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -23,7 +23,7 @@ limitations under the License.
 
				 namespace tflite {
			
 
				 namespace reference_ops {
			
 
				 
			
 
				-inline void AveragePool(const PoolParams& params,
			
 
				+inline bool AveragePool(const PoolParams& params,
			
 
				                         const RuntimeShape& input_shape,
			
 
				                         const float* input_data,
			
 
				                         const RuntimeShape& output_shape, float* output_data) {
			
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
 
				               filter_count++;
			
 
				             }
			
 
				           }
			
 
				+          if (filter_count == 0) return false;
			
 
				           const float average = total / filter_count;
			
 
				           output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				               ActivationFunctionWithMinMax(average, params.float_activation_min,
			
@@ -74,9 +75,10 @@ inline void AveragePool(const PoolParams& params,
 
				       }
			
 
				     }
			
 
				   }
			
 
				+  return true;
			
 
				 }
			
 
				 
			
 
				-inline void AveragePool(const PoolParams& params,
			
 
				+inline bool AveragePool(const PoolParams& params,
			
 
				                         const RuntimeShape& input_shape,
			
 
				                         const uint8_t* input_data,
			
 
				                         const RuntimeShape& output_shape,
			
@@ -122,6 +124,7 @@ inline void AveragePool(const PoolParams& params,
 
				               filter_count++;
			
 
				             }
			
 
				           }
			
 
				+          if (filter_count == 0) return false;
			
 
				           acc = (acc + filter_count / 2) / filter_count;
			
 
				           acc = std::max(acc, params.quantized_activation_min);
			
 
				           acc = std::min(acc, params.quantized_activation_max);
			
@@ -131,6 +134,7 @@ inline void AveragePool(const PoolParams& params,
 
				       }
			
 
				     }
			
 
				   }
			
 
				+  return true;
			
 
				 }
			
 
				 
			
 
				 inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -0,0 +1,774 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include <algorithm>
			
 
				+#include <cmath>
			
 
				+#include <cstdint>
			
 
				+#include <cstring>
			
 
				+#include <limits>
			
 
				+#include <utility>
			
 
				+
			
 
				+#include "fixedpoint/fixedpoint.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
			
 
				+
			
 
				+#if defined(_MSC_VER)
			
 
				+#define __restrict__ __restrict
			
 
				+#endif
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace tensor_utils {
			
 
				+
			
 
				+namespace {
			
 
				+const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
			
 
				+const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
			
 
				+}  // namespace
			
 
				+
			
 
				+void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				+                                     int8_t* quantized_values, float* min_value,
			
 
				+                                     float* max_value, float* scaling_factor) {
			
 
				+  auto minmax = std::minmax_element(values, values + size);
			
 
				+  *min_value = *minmax.first;
			
 
				+  *max_value = *minmax.second;
			
 
				+
			
 
				+  PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
			
 
				+                                  *max_value, scaling_factor);
			
 
				+}
			
 
				+
			
 
				+void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				+                                     int8_t* quantized_values, float min_value,
			
 
				+                                     float max_value, float* scaling_factor) {
			
 
				+  const int32_t kScale = 127;
			
 
				+  const float range = std::max(std::abs(min_value), std::abs(max_value));
			
 
				+  if (range == 0) {
			
 
				+    memset(quantized_values, 0, size * sizeof(int8_t));
			
 
				+    *scaling_factor = 1;
			
 
				+    return;
			
 
				+  }
			
 
				+  *scaling_factor = range / kScale;
			
 
				+  const float scaling_factor_inv = kScale / range;
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t quantized_value =
			
 
				+        static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
			
 
				+    // Clamp: just in case some odd numeric offset.
			
 
				+    quantized_values[i] = static_cast<int8_t>(
			
 
				+        std::min(kScale, std::max(-kScale, quantized_value)));
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableAsymmetricQuantizeFloats(const float* values, const int size,
			
 
				+                                      int8_t* quantized_values,
			
 
				+                                      float* scaling_factor, int32_t* offset) {
			
 
				+  const int32_t kMinScale = -128;
			
 
				+  const int32_t kMaxScale = 127;
			
 
				+  const double qmin_double = kMinScale;
			
 
				+  const double qmax_double = kMaxScale;
			
 
				+  const auto minmax = std::minmax_element(values, values + size);
			
 
				+  const double rmin = std::fmin(0, *minmax.first);
			
 
				+  const double rmax = std::fmax(0, *minmax.second);
			
 
				+  if (rmin == rmax) {
			
 
				+    memset(quantized_values, 0, size * sizeof(int8_t));
			
 
				+    *scaling_factor = 1;
			
 
				+    *offset = 0;
			
 
				+    return;
			
 
				+  } else {
			
 
				+    double scale = (rmax - rmin) / (qmax_double - qmin_double);
			
 
				+    const double zero_point_from_min = qmin_double - rmin / scale;
			
 
				+    const double zero_point_from_max = qmax_double - rmax / scale;
			
 
				+    const double zero_point_from_min_error =
			
 
				+        std::abs(qmin_double) + std::abs(rmin / scale);
			
 
				+    const double zero_point_from_max_error =
			
 
				+        std::abs(qmax_double) + std::abs(rmax / scale);
			
 
				+    const double zero_point_double =
			
 
				+        zero_point_from_min_error < zero_point_from_max_error
			
 
				+            ? zero_point_from_min
			
 
				+            : zero_point_from_max;
			
 
				+    int8_t nudged_zero_point = 0;
			
 
				+    if (zero_point_double <= qmin_double) {
			
 
				+      nudged_zero_point = kMinScale;
			
 
				+    } else if (zero_point_double >= qmax_double) {
			
 
				+      nudged_zero_point = kMaxScale;
			
 
				+    } else {
			
 
				+      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
			
 
				+    }
			
 
				+    *scaling_factor = scale;
			
 
				+    *offset = nudged_zero_point;
			
 
				+  }
			
 
				+  const float scaling_factor_inv = 1.0f / *scaling_factor;
			
 
				+  for (int i = 0; i < size; ++i) {
			
 
				+    const int32_t quantized_value = static_cast<int32_t>(
			
 
				+        TfLiteRound(*offset + values[i] * scaling_factor_inv));
			
 
				+    quantized_values[i] =
			
 
				+        std::min(kMaxScale, std::max(kMinScale, quantized_value));
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
			
 
				+                                                 int m_rows, int m_cols,
			
 
				+                                                 const float* vector,
			
 
				+                                                 int n_batch, float* result) {
			
 
				+  float* result_in_batch = result;
			
 
				+  for (int b = 0; b < n_batch; b++) {
			
 
				+    const float* matrix_ptr = matrix;
			
 
				+    for (int r = 0; r < m_rows; r++) {
			
 
				+      float dot_prod = 0.0f;
			
 
				+      const float* vector_in_batch = vector + b * m_cols;
			
 
				+      for (int c = 0; c < m_cols; c++) {
			
 
				+        dot_prod += *matrix_ptr++ * *vector_in_batch++;
			
 
				+      }
			
 
				+      *result_in_batch += dot_prod;
			
 
				+      ++result_in_batch;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				+    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				+    int n_batch, float* __restrict__ result) {
			
 
				+  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
			
 
				+    const float batch_scaling_factor = scaling_factors[batch];
			
 
				+    // Get the address of the first row.
			
 
				+    const int8_t* row_ptr = matrix;
			
 
				+    for (int row = 0; row < m_rows; ++row) {
			
 
				+      // Initialize the dot product sum for the row to 0.
			
 
				+      int32_t dotprod = 0;
			
 
				+#if defined(__GNUC__)
			
 
				+      // Prefetch the row to cache.
			
 
				+      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
			
 
				+                         3 /* temporal locality */);
			
 
				+#endif
			
 
				+      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
			
 
				+        dotprod += (*row_ptr) * (vectors[col]);
			
 
				+      }  // for col
			
 
				+      *result += dotprod * batch_scaling_factor;
			
 
				+      ++result;
			
 
				+    }  // for row
			
 
				+  }    // for batch
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				+    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				+    int n_batch, float* __restrict__ result, const float* per_channel_scale,
			
 
				+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
			
 
				+    bool* compute_row_sums, CpuBackendContext* context) {
			
 
				+  if (input_offset == nullptr) {
			
 
				+    PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+        matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
			
 
				+    return;
			
 
				+  }
			
 
				+  if (!compute_row_sums || *compute_row_sums) {
			
 
				+    PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
			
 
				+    if (compute_row_sums) {
			
 
				+      *compute_row_sums = false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
			
 
				+    const float batch_scaling_factor = scaling_factors[batch];
			
 
				+    const int32_t batch_offset = input_offset[batch];
			
 
				+    const int8_t* row_ptr = matrix;
			
 
				+    for (int row = 0; row < m_rows; ++row) {
			
 
				+      int32_t dotprod = 0;
			
 
				+      float scale = batch_scaling_factor;
			
 
				+      if (per_channel_scale) {
			
 
				+        scale *= per_channel_scale[row];
			
 
				+      }
			
 
				+#if defined(__GNUC__)
			
 
				+      // Prefetch the row to cache.
			
 
				+      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
			
 
				+                         3 /* temporal locality */);
			
 
				+#endif
			
 
				+      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
			
 
				+        dotprod += (*row_ptr) * vectors[col];
			
 
				+      }  // for col
			
 
				+      dotprod -= row_sums[row] * batch_offset;
			
 
				+      *result += dotprod * scale;
			
 
				+      ++result;
			
 
				+    }  // for row
			
 
				+  }    // for batch
			
 
				+}
			
 
				+
			
 
				+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
			
 
				+  const int kBlockSize = 4;
			
 
				+  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
			
 
				+  for (int batch = 0; batch < n_batch; batch++) {
			
 
				+    const float* matrix_ptr = matrix;
			
 
				+    for (int row = 0; row < m_rows; row++) {
			
 
				+      float dot_prod = 0.0f;
			
 
				+      const float* vector_in_batch = vector + batch * m_cols;
			
 
				+      for (int i = segments[row]; i < segments[row + 1]; i++) {
			
 
				+        const int block_start_index = indices[i] * kBlockSize;
			
 
				+        const float* vector_block_in_batch_ptr =
			
 
				+            vector_in_batch + block_start_index;
			
 
				+        for (int c = 0; c < kBlockSize; c++) {
			
 
				+          dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
			
 
				+        }
			
 
				+      }
			
 
				+      result[batch * m_rows + row] += dot_prod;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				+    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
			
 
				+    float* __restrict__ result) {
			
 
				+  const int kBlockSize = 16;
			
 
				+  TFLITE_DCHECK_EQ(  // NOLINT
			
 
				+      m_cols % kBlockSize, 0);
			
 
				+  for (int batch = 0; batch < n_batch; batch++) {
			
 
				+    const float* matrix_ptr = matrix;
			
 
				+    const uint8_t* ledger_ptr = ledger;
			
 
				+    for (int row = 0; row < m_rows; row++) {
			
 
				+      float dot_prod = 0.0f;
			
 
				+      int num_nonzero_blocks = *ledger_ptr++;
			
 
				+      if (num_nonzero_blocks > 0) {
			
 
				+        const float* vector_in_batch = vector + batch * m_cols;
			
 
				+        for (int i = 0; i < num_nonzero_blocks; i++) {
			
 
				+          const int block_start_index = *ledger_ptr++ * kBlockSize;
			
 
				+          const float* vector_block_in_batch_ptr =
			
 
				+              vector_in_batch + block_start_index;
			
 
				+          for (int c = 0; c < kBlockSize; c++) {
			
 
				+            dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+      result[batch * m_rows + row] += dot_prod;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
			
 
				+    const int m_cols, const int8_t* __restrict__ vectors,
			
 
				+    const float* scaling_factors, int n_batch, float* __restrict__ result) {
			
 
				+  static const int kBlockSize = 16;
			
 
				+  TFLITE_DCHECK_EQ(  // NOLINT
			
 
				+      m_cols % kBlockSize, 0);
			
 
				+  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
			
 
				+    const float batch_scaling_factor = scaling_factors[batch];
			
 
				+    const uint8_t* ledger_ptr = ledger;
			
 
				+    // Get the address of the first row.
			
 
				+    const int8_t* row_ptr = matrix;
			
 
				+    for (int row = 0; row < m_rows; ++row) {
			
 
				+      // Initialize the dot product sum for the row to 0.
			
 
				+      int32_t dotprod = 0;
			
 
				+#if defined(__GNUC__)
			
 
				+      // Prefetch the row to cache.
			
 
				+      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
			
 
				+                         3 /* temporal locality */);
			
 
				+#endif
			
 
				+      int num_nonzero_blocks = *ledger_ptr++;
			
 
				+      for (int i = 0; i < num_nonzero_blocks; i++) {
			
 
				+        const int block_start_index = *ledger_ptr++ * kBlockSize;
			
 
				+        const int8_t* vector_block_ptr = vectors + block_start_index;
			
 
				+        for (int c = 0; c < kBlockSize; c++) {
			
 
				+          dotprod += (*row_ptr++) * (*vector_block_ptr++);
			
 
				+        }  // for block
			
 
				+      }    // for num_nonzero_blocks
			
 
				+      result[batch * m_rows + row] += dotprod * batch_scaling_factor;
			
 
				+    }  // for row
			
 
				+  }    // for batch
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulateImpl(
			
 
				+    const int8_t* input, const int32_t* bias,
			
 
				+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				+    T* output) {
			
 
				+  const int16_t output_max = std::numeric_limits<T>::max();
			
 
				+  const int16_t output_min = std::numeric_limits<T>::min();
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int row = 0; row < n_output; ++row) {
			
 
				+      int32_t acc = bias[row];
			
 
				+      for (int col = 0; col < n_input; ++col) {
			
 
				+        int8_t input_val = input[batch * n_input + col];
			
 
				+        int8_t weights_val = input_to_gate_weights[row * n_input + col];
			
 
				+        acc += input_val * weights_val;
			
 
				+      }
			
 
				+      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
			
 
				+      acc += output_zp;
			
 
				+      acc += output[batch * n_output + row];
			
 
				+      if (acc > output_max) {
			
 
				+        acc = output_max;
			
 
				+      }
			
 
				+      if (acc < output_min) {
			
 
				+        acc = output_min;
			
 
				+      }
			
 
				+      output[batch * n_output + row] = static_cast<T>(acc);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* input, const int32_t* bias,
			
 
				+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				+    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
			
 
				+  PortableMatrixBatchVectorMultiplyAccumulateImpl(
			
 
				+      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
			
 
				+      n_output, output_zp, output);
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* input, const int32_t* bias,
			
 
				+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				+    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
			
 
				+  PortableMatrixBatchVectorMultiplyAccumulateImpl(
			
 
				+      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
			
 
				+      n_output, output_zp, output);
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiply(const int8_t* input,
			
 
				+                                       int32_t input_zeropoint,
			
 
				+                                       const int8_t* input_to_gate_weights,
			
 
				+                                       int32_t input_to_gate_effective_scale_a,
			
 
				+                                       int32_t input_to_gate_effective_scale_b,
			
 
				+                                       int32_t n_batch, int32_t n_input,
			
 
				+                                       int32_t n_cell, int8_t* gate_output,
			
 
				+                                       int8_t gate_output_zp) {
			
 
				+  const int32_t int8_max = std::numeric_limits<int8_t>::max();
			
 
				+  const int32_t int8_min = std::numeric_limits<int8_t>::min();
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int row = 0; row < n_cell; ++row) {
			
 
				+      int32_t acc = 0;
			
 
				+      for (int col = 0; col < n_input; ++col) {
			
 
				+        int32_t input_val = input[batch * n_input + col];
			
 
				+        int8_t weights_val = input_to_gate_weights[row * n_input + col];
			
 
				+        acc += (input_val - input_zeropoint) * weights_val;
			
 
				+      }
			
 
				+      acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
			
 
				+                                          input_to_gate_effective_scale_b);
			
 
				+      acc += gate_output_zp;
			
 
				+      if (acc > int8_max) {
			
 
				+        acc = int8_max;
			
 
				+      }
			
 
				+      if (acc < int8_min) {
			
 
				+        acc = int8_min;
			
 
				+      }
			
 
				+      gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiply(
			
 
				+    const int16_t* hidden, const int8_t* hidden_to_output_weights,
			
 
				+    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
			
 
				+    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
			
 
				+    int32_t n_output, int32_t output_zp, int8_t* proj_output) {
			
 
				+  const int16_t int8_max = std::numeric_limits<int8_t>::max();
			
 
				+  const int16_t int8_min = std::numeric_limits<int8_t>::min();
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int row = 0; row < n_output; ++row) {
			
 
				+      int64_t acc = gate_bias[row];
			
 
				+      for (int col = 0; col < n_hidden; ++col) {
			
 
				+        int16_t input_val = hidden[batch * n_hidden + col];
			
 
				+        int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
			
 
				+        int64_t curr = acc;
			
 
				+        acc += input_val * weights_val;
			
 
				+        if (input_val * weights_val > 0 && acc < curr) {
			
 
				+          acc = std::numeric_limits<int32_t>::max();
			
 
				+        }
			
 
				+        if (input_val * weights_val < 0 && acc > curr) {
			
 
				+          acc = std::numeric_limits<int32_t>::min();
			
 
				+        }
			
 
				+      }
			
 
				+      acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
			
 
				+                                          proj_effective_scale_b);
			
 
				+      acc += output_zp;
			
 
				+      if (acc > int8_max) {
			
 
				+        acc = int8_max;
			
 
				+      }
			
 
				+      if (acc < int8_min) {
			
 
				+        acc = int8_min;
			
 
				+      }
			
 
				+      proj_output[batch * n_output + row] = acc;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableApplyLayerNorm(const int16_t* input,
			
 
				+                            const int16_t* layer_norm_weights,
			
 
				+                            const int32_t* bias, int32_t layer_norm_scale_a,
			
 
				+                            int32_t layer_norm_scale_b, int32_t variance_limit,
			
 
				+                            int n_batch, int n_input, int16_t* output) {
			
 
				+  // The square of std::pow(2, 10), which is the extra factor that makes sure
			
 
				+  // normalized values has enough resolution.
			
 
				+  static const int kTwoToPower20 = 1 << 20;
			
 
				+  for (int i = 0; i < n_batch; ++i) {
			
 
				+    int64_t sum = 0;
			
 
				+    int64_t sum_sq = 0;
			
 
				+    for (int j = 0; j < n_input; ++j) {
			
 
				+      const int32_t index = i * n_input + j;
			
 
				+      int32_t val = static_cast<int32_t>(input[index]);
			
 
				+      sum += val;
			
 
				+      sum_sq += val * val;
			
 
				+    }
			
 
				+    int32_t mean =
			
 
				+        static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
			
 
				+    // TODO(b/173994730): Avoids overflow but only works for POT n_input.
			
 
				+    int32_t temp = kTwoToPower20 / n_input;
			
 
				+    int64_t variance =
			
 
				+        sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
			
 
				+    int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
			
 
				+    if (variance2 < 1) {
			
 
				+      variance2 = variance_limit;
			
 
				+    }
			
 
				+    int32_t stddev_inverse_a;
			
 
				+    int stddev_inverse_b;
			
 
				+    GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
			
 
				+                                     &stddev_inverse_a, &stddev_inverse_b);
			
 
				+
			
 
				+    for (int j = 0; j < n_input; ++j) {
			
 
				+      const int32_t index = i * n_input + j;
			
 
				+      int32_t val = static_cast<int32_t>(input[index]);
			
 
				+      int32_t shifted = 1024 * val - mean;
			
 
				+      int32_t rescaled = MultiplyByQuantizedMultiplier(
			
 
				+          shifted, stddev_inverse_a, stddev_inverse_b);
			
 
				+      // TODO(jianlijianli): Saturate this.
			
 
				+      int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
			
 
				+      int32_t val4 =
			
 
				+          static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
			
 
				+      int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
			
 
				+                                                   layer_norm_scale_b + 12);
			
 
				+      val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
			
 
				+      output[index] = static_cast<int16_t>(val5);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableApplyLayerNormFloat(const int16_t* input,
			
 
				+                                 const int16_t* layer_norm_weights,
			
 
				+                                 int32_t layer_norm_scale_a,
			
 
				+                                 int32_t layer_norm_scale_b,
			
 
				+                                 const int32_t* bias, int n_batch, int n_input,
			
 
				+                                 int16_t* output) {
			
 
				+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				+  const float layer_norm_scale =
			
 
				+      layer_norm_scale_a *
			
 
				+      std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
			
 
				+  const float bias_scale =
			
 
				+      static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
			
 
				+
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    float sum = 0.0f;
			
 
				+    float sum_sq = 0.0f;
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      const float value = static_cast<float>(input[index]);
			
 
				+      sum += value;
			
 
				+      sum_sq += value * value;
			
 
				+    }
			
 
				+    const float mean = sum / n_input;
			
 
				+    float stddev_inv = 0.0f;
			
 
				+    const float variance = sum_sq / n_input - mean * mean;
			
 
				+    if (variance == 0) {
			
 
				+      stddev_inv = 1.0f / std::sqrt(1e-8f);
			
 
				+    } else {
			
 
				+      stddev_inv = 1.0f / std::sqrt(variance);
			
 
				+    }
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      const float normalized_value =
			
 
				+          (static_cast<float>(input[index]) - mean) * stddev_inv;
			
 
				+      const float weighted_normalized_value =
			
 
				+          normalized_value * layer_norm_weights[i] * layer_norm_scale +
			
 
				+          bias[i] * bias_scale;
			
 
				+      const int32_t quant_output = static_cast<int32_t>(std::round(
			
 
				+          weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
			
 
				+      output[index] = std::min(int16_max, std::max(int16_min, quant_output));
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
			
 
				+                                            int32_t scalar, int32_t n_row,
			
 
				+                                            int32_t n_col, int32_t* output) {
			
 
				+  for (int i = 0; i < n_row; ++i) {
			
 
				+    int32_t row_sum = 0;
			
 
				+    for (int j = 0; j < n_col; ++j) {
			
 
				+      row_sum += *matrix++;
			
 
				+    }
			
 
				+    output[i] += row_sum * scalar;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
			
 
				+                          int32_t n_input, int16_t* output) {
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int c = 0; c < n_input; c++) {
			
 
				+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
			
 
				+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				+      const int index = batch * n_input + c;
			
 
				+      F3 sigmoid_input = F3::FromRaw(input[index]);
			
 
				+      F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
			
 
				+      output[index] = sigmoid_output.raw();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
			
 
				+                               int32_t n_input, int16_t* output) {
			
 
				+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      const float float_input =
			
 
				+          input[index] * static_cast<float>(std::pow(2, -12));
			
 
				+      const float float_output = 1.0f / (1.0f + std::exp(-float_input));
			
 
				+      const int32_t quant_output = static_cast<int32_t>(
			
 
				+          float_output * static_cast<float>(std::pow(2, 15)));
			
 
				+      const int32_t quant_output_clamped =
			
 
				+          std::min(int16_max, std::max(int16_min, quant_output));
			
 
				+      output[index] = static_cast<int16_t>(quant_output_clamped);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <int IntegerBits>
			
 
				+void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
			
 
				+                           int32_t n_input, int16_t* output) {
			
 
				+  using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
			
 
				+  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      FX tanh_input = FX::FromRaw(input[index]);
			
 
				+      F0 tanh_output = gemmlowp::tanh(tanh_input);
			
 
				+      output[index] = tanh_output.raw();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
			
 
				+                       int32_t n_batch, int32_t n_input, int16_t* output) {
			
 
				+  assert(integer_bits <= 6);
			
 
				+#define DISPATCH_TANH(i)                                       \
			
 
				+  case i:                                                      \
			
 
				+    PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
			
 
				+    break;
			
 
				+  switch (integer_bits) {
			
 
				+    DISPATCH_TANH(0);
			
 
				+    DISPATCH_TANH(1);
			
 
				+    DISPATCH_TANH(2);
			
 
				+    DISPATCH_TANH(3);
			
 
				+    DISPATCH_TANH(4);
			
 
				+    DISPATCH_TANH(5);
			
 
				+    DISPATCH_TANH(6);
			
 
				+    default:
			
 
				+      return;
			
 
				+  }
			
 
				+#undef DISPATCH_TANH
			
 
				+}
			
 
				+
			
 
				+void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
			
 
				+                            int32_t n_input, int32_t integer_bits,
			
 
				+                            int16_t* output) {
			
 
				+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				+  const double two = 2.0;
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      const float float_input =
			
 
				+          input[index] * std::pow(two, static_cast<double>(integer_bits));
			
 
				+      const float float_output = std::tanh(float_input);
			
 
				+      const int32_t quant_output = static_cast<int32_t>(
			
 
				+          float_output * static_cast<float>(std::pow(2, 15)));
			
 
				+      const int32_t quant_output_clamped =
			
 
				+          std::min(int16_max, std::max(int16_min, quant_output));
			
 
				+      output[index] = static_cast<int16_t>(quant_output_clamped);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				+                      int n_batch, int n_input, int shift, int16_t* output) {
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      const int16_t a = input_1[index];
			
 
				+      const int16_t b = input_2[index];
			
 
				+      const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
			
 
				+      output[index] =
			
 
				+          static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				+                      int32_t multiplier, int32_t shift, int32_t n_batch,
			
 
				+                      int32_t n_input, int32_t output_zp, int8_t* output) {
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      const int16_t a = input_1[index];
			
 
				+      const int16_t b = input_2[index];
			
 
				+      int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
			
 
				+      value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
			
 
				+      value -= output_zp;
			
 
				+      value = std::min(std::max(static_cast<int32_t>(-128), value),
			
 
				+                       static_cast<int32_t>(127));
			
 
				+
			
 
				+      output[index] = static_cast<int8_t>(value);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
			
 
				+                      int n_batch, int n_input, int16_t* output) {
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    for (int i = 0; i < n_input; ++i) {
			
 
				+      const int index = batch * n_input + i;
			
 
				+      int32_t sum = input_1[index] + input_2[index];
			
 
				+      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
			
 
				+      output[index] = static_cast<int16_t>(sum_clamped);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
			
 
				+                                     int v_size) {
			
 
				+  float result = 0.0;
			
 
				+  for (int v = 0; v < v_size; v++) {
			
 
				+    result += *vector1++ * *vector2++;
			
 
				+  }
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+namespace {
			
 
				+inline int32_t VectorVectorDotProduct(const int16_t* vector1,
			
 
				+                                      const int16_t* vector2, int v_size) {
			
 
				+  int32_t result = 0;
			
 
				+  for (int v = 0; v < v_size; v++) {
			
 
				+    result += *vector1++ * *vector2++;
			
 
				+  }
			
 
				+  return result;
			
 
				+}
			
 
				+}  // namespace
			
 
				+
			
 
				+void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
			
 
				+                                              const int16_t* vector2,
			
 
				+                                              int v_size, int n_batch,
			
 
				+                                              int32_t* result) {
			
 
				+  for (int b = 0; b < n_batch; b++) {
			
 
				+    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
			
 
				+    vector1 += v_size;
			
 
				+    vector2 += v_size;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableVectorBatchVectorCwiseProductAccumulate(
			
 
				+    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
			
 
				+    int32_t multiplier, int shift, int16_t* result) {
			
 
				+  for (int b = 0; b < n_batch; b++) {
			
 
				+    for (int v = 0; v < v_size; v++) {
			
 
				+      int32_t prod = vector[v] * *batch_vector++;
			
 
				+      prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
			
 
				+      int32_t output = prod + *result;
			
 
				+      output = std::max(std::min(static_cast<int32_t>(32767), output),
			
 
				+                        static_cast<int32_t>(-32768));
			
 
				+      *result++ = output;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableSub1Vector(const float* vector, int v_size, float* result) {
			
 
				+  for (int v = 0; v < v_size; v++) {
			
 
				+    *result++ = 1.0f - *vector++;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
			
 
				+  static const int16_t kOne = 32767;
			
 
				+  for (int v = 0; v < v_size; v++) {
			
 
				+    *result++ = kOne - *vector++;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
			
 
				+                                  const float scale, float* result) {
			
 
				+  for (int v = 0; v < v_size; ++v) {
			
 
				+    *result++ = scale * *vector++;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
			
 
				+                                     float* __restrict__ output_vector,
			
 
				+                                     int v_size, int n_batch) {
			
 
				+  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				+    float sum = 0.0f;
			
 
				+    for (int i = 0; i < v_size; ++i) {
			
 
				+      sum += input_vector[i];
			
 
				+    }
			
 
				+    const float mean = sum / v_size;
			
 
				+    float sum_diff_sq = 0.0f;
			
 
				+    for (int i = 0; i < v_size; ++i) {
			
 
				+      const float diff = input_vector[i] - mean;
			
 
				+      sum_diff_sq += diff * diff;
			
 
				+    }
			
 
				+    const float variance = sum_diff_sq / v_size;
			
 
				+    constexpr float kNormalizationConstant = 1e-8f;
			
 
				+    const float stddev_inv =
			
 
				+        1.0f / std::sqrt(variance + kNormalizationConstant);
			
 
				+    for (int i = 0; i < v_size; ++i) {
			
 
				+      output_vector[i] = (input_vector[i] - mean) * stddev_inv;
			
 
				+    }
			
 
				+    input_vector += v_size;
			
 
				+    output_vector += v_size;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
			
 
				+                                  const int8_t* recurrent, int8_t recurrent_zp,
			
 
				+                                  int32_t input_effective_scale_a,
			
 
				+                                  int32_t input_effective_scale_b,
			
 
				+                                  int32_t recurrent_effective_scale_a,
			
 
				+                                  int32_t recurrent_effective_scale_b,
			
 
				+                                  int32_t n_batch, int32_t n_cell,
			
 
				+                                  int16_t* output) {
			
 
				+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				+  for (int i = 0; i < n_batch * n_cell; ++i) {
			
 
				+    int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
			
 
				+    int32_t h =
			
 
				+        static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
			
 
				+    int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
			
 
				+                                                     input_effective_scale_b);
			
 
				+    int32_t h_scaled = MultiplyByQuantizedMultiplier(
			
 
				+        h, recurrent_effective_scale_a, recurrent_effective_scale_b);
			
 
				+    int32_t y = h_scaled + x_scaled;
			
 
				+    if (y > int16_max) {
			
 
				+      y = int16_max;
			
 
				+    }
			
 
				+    if (y < int16_min) {
			
 
				+      y = int16_min;
			
 
				+    }
			
 
				+    output[i] = static_cast<int16_t>(y);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace tensor_utils
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@@ -0,0 +1,235 @@
 
				+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#if defined(_MSC_VER)
			
 
				+#define __restrict__ __restrict
			
 
				+#endif
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// Not all backends support CpuBackendContext usage, so forward declare to avoid
			
 
				+// pulling in its implementation.
			
 
				+class CpuBackendContext;
			
 
				+
			
 
				+namespace tensor_utils {
			
 
				+
			
 
				+template <typename T>
			
 
				+bool PortableIsZeroVector(const T* vector, int v_size) {
			
 
				+  for (int i = 0; i < v_size; ++i) {
			
 
				+    if (vector[i] != 0) {
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				+void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				+                                     int8_t* quantized_values, float* min_value,
			
 
				+                                     float* max_value, float* scaling_factor);
			
 
				+
			
 
				+void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				+                                     int8_t* quantized_values, float min_value,
			
 
				+                                     float max_value, float* scaling_factor);
			
 
				+
			
 
				+void PortableAsymmetricQuantizeFloats(const float* values, const int size,
			
 
				+                                      int8_t* quantized_values,
			
 
				+                                      float* scaling_factor, int32_t* offset);
			
 
				+
			
 
				+// Multiply a matrix by a batch vector, and store results in a batch-size
			
 
				+// vector.
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
			
 
				+                                                 int m_rows, int m_cols,
			
 
				+                                                 const float* vector,
			
 
				+                                                 int n_batch, float* result);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				+    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				+    int n_batch, float* __restrict__ result);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				+    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				+    int n_batch, float* __restrict__ result, const float* per_channel_scale,
			
 
				+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
			
 
				+    bool* compute_row_sums, CpuBackendContext* context);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				+    const int8_t* __restrict__ vector, const float* scaling_factors,
			
 
				+    int n_batch, int32_t* scratch, float* __restrict__ result,
			
 
				+    CpuBackendContext* context);
			
 
				+
			
 
				+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				+    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
			
 
				+
			
 
				+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				+    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
			
 
				+    float* __restrict__ result);
			
 
				+
			
 
				+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
			
 
				+    const int m_cols, const int8_t* __restrict__ vectors,
			
 
				+    const float* scaling_factors, int n_batch, float* __restrict__ result);
			
 
				+
			
 
				+// Dot product of two vectors.
			
 
				+float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
			
 
				+                                     int v_size);
			
 
				+
			
 
				+void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
			
 
				+                                              const int16_t* vector2,
			
 
				+                                              int v_size, int n_batch,
			
 
				+                                              int32_t* result);
			
 
				+
			
 
				+void PortableVectorBatchVectorCwiseProductAccumulate(
			
 
				+    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
			
 
				+    int32_t multiplier, int shift, int16_t* result);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* input, const int32_t* bias,
			
 
				+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				+    int32_t* scratch, int16_t* output, CpuBackendContext* context);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				+    const int8_t* input, const int32_t* bias,
			
 
				+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				+    int32_t* scratch, int8_t* output, CpuBackendContext* context);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiply(const int8_t* input,
			
 
				+                                       int32_t input_zeropoint,
			
 
				+                                       const int8_t* input_to_gate_weights,
			
 
				+                                       int32_t input_to_gate_effective_scale_a,
			
 
				+                                       int32_t input_to_gate_effective_scale_b,
			
 
				+                                       int32_t n_batch, int32_t n_input,
			
 
				+                                       int32_t n_cell, int8_t* gate_output,
			
 
				+                                       int8_t gate_output_zp);
			
 
				+
			
 
				+void PortableMatrixBatchVectorMultiply(
			
 
				+    const int16_t* hidden, const int8_t* hidden_to_output_weights,
			
 
				+    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
			
 
				+    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
			
 
				+    int32_t n_output, int32_t output_zp, int8_t* proj_output);
			
 
				+
			
 
				+void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
			
 
				+                                            int32_t scalar, int32_t n_row,
			
 
				+                                            int32_t n_col, int32_t* output);
			
 
				+
			
 
				+void PortableApplyLayerNorm(const int16_t* input,
			
 
				+                            const int16_t* layer_norm_weights,
			
 
				+                            const int32_t* bias, int32_t layer_norm_scale_a,
			
 
				+                            int32_t layer_norm_scale_b, int32_t variance_limit,
			
 
				+                            int n_batch, int n_input, int16_t* output);
			
 
				+
			
 
				+void PortableApplyLayerNormFloat(const int16_t* input,
			
 
				+                                 const int16_t* layer_norm_weights,
			
 
				+                                 int32_t layer_norm_scale_a,
			
 
				+                                 int32_t layer_norm_scale_b,
			
 
				+                                 const int32_t* bias, int n_batch, int n_input,
			
 
				+                                 int16_t* output);
			
 
				+
			
 
				+void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
			
 
				+                          int32_t n_input, int16_t* output);
			
 
				+
			
 
				+void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
			
 
				+                               int32_t n_input, int16_t* output);
			
 
				+
			
 
				+void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
			
 
				+                       int32_t n_batch, int32_t n_input, int16_t* output);
			
 
				+
			
 
				+void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
			
 
				+                            int32_t n_input, int32_t integer_bits,
			
 
				+                            int16_t* output);
			
 
				+
			
 
				+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				+                      int n_batch, int n_input, int shift, int16_t* output);
			
 
				+
			
 
				+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				+                      int32_t multiplier, int32_t shift, int32_t n_batch,
			
 
				+                      int32_t n_input, int32_t output_zp, int8_t* output);
			
 
				+
			
 
				+void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
			
 
				+                      int n_batch, int n_input, int16_t* output);
			
 
				+
			
 
				+template <typename T>
			
 
				+void PortableCwiseClipping(T* vector, const int v_size,
			
 
				+                           const T& clipping_value) {
			
 
				+  for (int i = 0; i < v_size; i++) {
			
 
				+    vector[i] = std::max(std::min(clipping_value, vector[i]),
			
 
				+                         static_cast<T>(-clipping_value));
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Batch vector initialization with another vector.
			
 
				+void PortableVectorBatchVectorAssign(const float* vector, int v_size,
			
 
				+                                     int n_batch, float* batch_vector);
			
 
				+
			
 
				+// Compute "1.0f - elements of vector" (used in CIFG).
			
 
				+void PortableSub1Vector(const float* vector, int v_size, float* result);
			
 
				+
			
 
				+void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
			
 
				+
			
 
				+// Multiply all elements of vector with a scalar.
			
 
				+void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
			
 
				+                                  float* result);
			
 
				+
			
 
				+// Reduce-sum on a vector:
			
 
				+// input_vector: pointer to input vector.
			
 
				+// output_vector: pointer to vector.
			
 
				+// output_size: output vector size.
			
 
				+// reduction_size: number of consecutive elements from input vector which are
			
 
				+// added to get one element of output.
			
 
				+template <typename INPUT, typename OUTPUT>
			
 
				+void PortableReductionSumVector(const INPUT* input_vector,
			
 
				+                                OUTPUT* output_vector, int output_size,
			
 
				+                                int reduction_size) {
			
 
				+  for (int o = 0; o < output_size; o++) {
			
 
				+    OUTPUT result = 0;
			
 
				+    for (int r = 0; r < reduction_size; r++) {
			
 
				+      result += input_vector[r];
			
 
				+    }
			
 
				+    output_vector[o] = result;
			
 
				+    input_vector += reduction_size;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+// Layer norm for each batch.
			
 
				+void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
			
 
				+                                     float* __restrict__ output_vector,
			
 
				+                                     int v_size, int n_batch);
			
 
				+
			
 
				+// Saturate Add.
			
 
				+void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
			
 
				+                                  const int8_t* recurrent, int8_t recurrent_zp,
			
 
				+                                  int32_t input_effective_scale_a,
			
 
				+                                  int32_t input_effective_scale_b,
			
 
				+                                  int32_t recurrent_effective_scale_a,
			
 
				+                                  int32_t recurrent_effective_scale_b,
			
 
				+                                  int32_t n_batch, int32_t n_cell,
			
 
				+                                  int16_t* output);
			
 
				+
			
 
				+}  // namespace tensor_utils
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/reduce.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/reduce.h
@@ -23,6 +23,25 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				 #include "tensorflow/lite/kernels/internal/types.h"
			
 
				 
			
 
				+// Check if the reduction at index is the first one along the dimensions given
			
 
				+// in axis.
			
 
				+inline bool IsFirstReduction(const int* index, const int num_axis,
			
 
				+                             const int* axis) {
			
 
				+  if (num_axis == 0) {
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  TFLITE_DCHECK(index != nullptr);
			
 
				+  TFLITE_DCHECK(axis != nullptr);
			
 
				+  for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
			
 
				+    if (index[axis[axis_idx]] != 0) {
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				 namespace tflite {
			
 
				 
			
 
				 namespace reference_ops {
			
@@ -35,8 +54,7 @@ inline bool Reduce(const In* input_data, const int* input_dims,
 
				                    const int* output_dims, const int input_num_dims,
			
 
				                    const int output_num_dims, const int* axis,
			
 
				                    const int num_axis, int* input_iter,
			
 
				-                   Out reducer(const Out current, const In in),
			
 
				-                   Out* output_data) {
			
 
				+                   Out reducer(Out current, const In in), Out* output_data) {
			
 
				   // Reset input iterator.
			
 
				   for (int idx = 0; idx < input_num_dims; ++idx) {
			
 
				     input_iter[idx] = 0;
			
@@ -53,6 +71,37 @@ inline bool Reduce(const In* input_data, const int* input_dims,
 
				   return true;
			
 
				 }
			
 
				 
			
 
				+// Similar to above Reduce function but takes two reducer functions.
			
 
				+// The 'reducer_first' is called with the first value of the reduction,
			
 
				+// 'reducer_next' is then called for all the others.
			
 
				+template <typename In, typename Out>
			
 
				+inline bool Reduce(const In* input_data, const int* input_dims,
			
 
				+                   const int* output_dims, const int input_num_dims,
			
 
				+                   const int output_num_dims, const int* axis,
			
 
				+                   const int num_axis, int* input_iter,
			
 
				+                   const std::function<Out(In in)>& reducer_first,
			
 
				+                   const std::function<Out(Out current, In in)>& reducer_next,
			
 
				+                   Out* output_data) {
			
 
				+  // Reset input iterator.
			
 
				+  for (int idx = 0; idx < input_num_dims; ++idx) {
			
 
				+    input_iter[idx] = 0;
			
 
				+  }
			
 
				+  // Iterate through input_data.
			
 
				+  do {
			
 
				+    size_t input_offset =
			
 
				+        ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
			
 
				+    size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
			
 
				+                                               input_iter, num_axis, axis);
			
 
				+    if (IsFirstReduction(input_iter, num_axis, axis)) {
			
 
				+      output_data[output_offset] = reducer_first(input_data[input_offset]);
			
 
				+    } else {
			
 
				+      output_data[output_offset] =
			
 
				+          reducer_next(output_data[output_offset], input_data[input_offset]);
			
 
				+    }
			
 
				+  } while (NextIndex(input_num_dims, input_dims, input_iter));
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				 // This method parses the input 'axis' to remove duplicates and handle negative
			
 
				 // values, and returns a valid 'out_axis'
			
 
				 inline bool ResolveAxis(const int num_dims, const int* axis,
			
@@ -111,7 +160,8 @@ inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
 
				   for (int idx = 0; idx < num_dims; ++idx) {
			
 
				     size_t current = static_cast<size_t>(dims[idx]);
			
 
				     // Overflow prevention.
			
 
				-    if (num_elements > std::numeric_limits<size_t>::max() / current) {
			
 
				+    if (current > 0 &&
			
 
				+        num_elements > std::numeric_limits<size_t>::max() / current) {
			
 
				       return false;
			
 
				     }
			
 
				     num_elements *= current;
			
@@ -132,17 +182,20 @@ inline bool ReduceGeneric(const T* input_data, const int* input_dims,
 
				                           bool keep_dims, int* temp_index, int* resolved_axis,
			
 
				                           T init_value,
			
 
				                           T reducer(const T current, const T in)) {
			
 
				-  // Return early when input shape has zero dim.
			
 
				-  for (int i = 0; i < input_num_dims; ++i) {
			
 
				-    if (input_dims[i] == 0) return true;
			
 
				-  }
			
 
				-
			
 
				   // Reset output data.
			
 
				   if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
			
 
				                                output_data)) {
			
 
				     return false;
			
 
				   }
			
 
				 
			
 
				+  // Return early when input shape has zero dim. This is done after initializing
			
 
				+  // data for output tensor because there are cases that the input tensor is
			
 
				+  // empty but output tensor is not. In that case, output tensor should be
			
 
				+  // filled with init_value.
			
 
				+  for (int i = 0; i < input_num_dims; ++i) {
			
 
				+    if (input_dims[i] == 0) return true;
			
 
				+  }
			
 
				+
			
 
				   // Resolve axis.
			
 
				   int num_resolved_axis = 0;
			
 
				   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
			
@@ -290,9 +343,9 @@ inline void Mean(const tflite::MeanParams& op_params,
 
				   constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
			
 
				   constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
			
 
				 
			
 
				-  int32_t bias =
			
 
				-      output_zero_point -
			
 
				-      static_cast<int32_t>(input_zero_point * input_scale / output_scale);
			
 
				+  float temp = input_zero_point * input_scale / output_scale;
			
 
				+  temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
			
 
				+  int32_t bias = output_zero_point - static_cast<int32_t>(temp);
			
 
				   double real_scale =
			
 
				       static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
			
 
				 
			
@@ -353,6 +406,14 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
 
				     temp_sum[idx] = U();
			
 
				   }
			
 
				 
			
 
				+  // Return early when input shape has zero dim. This is done after initializing
			
 
				+  // data for output tensor because there are cases that the input tensor is
			
 
				+  // empty but output tensor is not. In that case, output tensor should be
			
 
				+  // filled with init_value.
			
 
				+  for (int i = 0; i < input_num_dims; ++i) {
			
 
				+    if (input_dims[i] == 0) return true;
			
 
				+  }
			
 
				+
			
 
				   // Resolve axis.
			
 
				   int num_resolved_axis = 0;
			
 
				   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
			
@@ -405,6 +466,57 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
 
				   return true;
			
 
				 }
			
 
				 
			
 
				+template <typename T>
			
 
				+inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point,
			
 
				+                                const RuntimeShape& input_shape, T* output_data,
			
 
				+                                int32_t output_zero_point,
			
 
				+                                const RuntimeShape& output_shape,
			
 
				+                                const int* axis,
			
 
				+                                const int64_t num_axis_dimensions,
			
 
				+                                bool keep_dims, int* temp_index,
			
 
				+                                int* resolved_axis, int32_t* temp_prod,
			
 
				+                                int32_t scaling_multiplier, int scaling_shift) {
			
 
				+  const int32_t kMinValue = std::numeric_limits<T>::min();
			
 
				+  const int32_t kMaxValue = std::numeric_limits<T>::max();
			
 
				+
			
 
				+  // Resolve axis.
			
 
				+  int num_resolved_axis = 0;
			
 
				+  if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions,
			
 
				+                   resolved_axis, &num_resolved_axis)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  // Calculate the reduced product by rescaling each multiplication step to
			
 
				+  // avoid an overflow.
			
 
				+  auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; };
			
 
				+
			
 
				+  auto reducer_next = [&](int32_t current, T in) -> int32_t {
			
 
				+    const int64_t result =
			
 
				+        static_cast<int64_t>(current) * (in - input_zero_point);
			
 
				+    return MultiplyByQuantizedMultiplier(result, scaling_multiplier,
			
 
				+                                         scaling_shift);
			
 
				+  };
			
 
				+
			
 
				+  if (!Reduce<T, int32_t>(
			
 
				+          input_data, input_shape.DimsData(), output_shape.DimsData(),
			
 
				+          input_shape.DimensionsCount(), output_shape.DimensionsCount(),
			
 
				+          resolved_axis, num_resolved_axis, temp_index, reducer_first,
			
 
				+          reducer_next, temp_prod)) {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  for (int i = 0; i < output_shape.FlatSize(); i++) {
			
 
				+    int32_t result =
			
 
				+        MultiplyByQuantizedMultiplier(static_cast<int64_t>(temp_prod[i]),
			
 
				+                                      scaling_multiplier, scaling_shift) +
			
 
				+        output_zero_point;
			
 
				+    result = std::min(std::max(result, kMinValue), kMaxValue);
			
 
				+    output_data[i] = static_cast<T>(result);
			
 
				+  }
			
 
				+
			
 
				+  return true;
			
 
				+}
			
 
				+
			
 
				 }  // namespace reference_ops
			
 
				 
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/resize_bilinear.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/resize_bilinear.h
@@ -0,0 +1,228 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cmath>
			
 
				+#include <cstdint>
			
 
				+#include <limits>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+inline void ComputeInterpolationValues(const float value, const float scale,
			
 
				+                                       const bool half_pixel_centers,
			
 
				+                                       int32_t input_size, float* scaled_value,
			
 
				+                                       int32_t* lower_bound,
			
 
				+                                       int32_t* upper_bound) {
			
 
				+  if (half_pixel_centers) {
			
 
				+    *scaled_value = (value + 0.5f) * scale - 0.5f;
			
 
				+  } else {
			
 
				+    *scaled_value = value * scale;
			
 
				+  }
			
 
				+  float scaled_value_floor = std::floor(*scaled_value);
			
 
				+  *lower_bound = std::max(static_cast<int32_t>(scaled_value_floor),
			
 
				+                          static_cast<int32_t>(0));
			
 
				+  *upper_bound =
			
 
				+      std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
			
 
				+                           const RuntimeShape& unextended_input_shape,
			
 
				+                           const T* input_data,
			
 
				+                           const RuntimeShape& unextended_output_size_shape,
			
 
				+                           const int32_t* output_size_data,
			
 
				+                           const RuntimeShape& unextended_output_shape,
			
 
				+                           T* output_data) {
			
 
				+  // If half_pixel_centers is True, align_corners must be False.
			
 
				+  TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
			
 
				+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_size_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  int32_t input_height = input_shape.Dims(1);
			
 
				+  int32_t input_width = input_shape.Dims(2);
			
 
				+  int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
			
 
				+  int32_t output_height =
			
 
				+      output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
			
 
				+  int32_t output_width =
			
 
				+      output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
			
 
				+
			
 
				+  float height_scale = static_cast<float>(input_height) / output_height;
			
 
				+  float width_scale = static_cast<float>(input_width) / output_width;
			
 
				+  if (op_params.align_corners && output_height > 1) {
			
 
				+    height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
			
 
				+  }
			
 
				+  if (op_params.align_corners && output_width > 1) {
			
 
				+    width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
			
 
				+  }
			
 
				+  const float rounding_offset = std::numeric_limits<T>::is_integer ? .5f : .0f;
			
 
				+
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int y = 0; y < output_height; ++y) {
			
 
				+      float input_y;
			
 
				+      int32_t y0, y1;
			
 
				+      ComputeInterpolationValues(y, height_scale, op_params.half_pixel_centers,
			
 
				+                                 input_height, &input_y, &y0, &y1);
			
 
				+      for (int x = 0; x < output_width; ++x) {
			
 
				+        float input_x;
			
 
				+        int32_t x0, x1;
			
 
				+        ComputeInterpolationValues(x, width_scale, op_params.half_pixel_centers,
			
 
				+                                   input_width, &input_x, &x0, &x1);
			
 
				+        for (int c = 0; c < depth; ++c) {
			
 
				+          T interpolation =
			
 
				+              static_cast<T>(input_data[Offset(input_shape, b, y0, x0, c)] *
			
 
				+                                 (1 - (input_y - y0)) * (1 - (input_x - x0)) +
			
 
				+                             input_data[Offset(input_shape, b, y1, x0, c)] *
			
 
				+                                 (input_y - y0) * (1 - (input_x - x0)) +
			
 
				+                             input_data[Offset(input_shape, b, y0, x1, c)] *
			
 
				+                                 (1 - (input_y - y0)) * (input_x - x0) +
			
 
				+                             input_data[Offset(input_shape, b, y1, x1, c)] *
			
 
				+                                 (input_y - y0) * (input_x - x0) +
			
 
				+                             rounding_offset);
			
 
				+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+inline void ComputeInterpolationValuesInteger(
			
 
				+    const int32_t value, const int32_t scale_10, const bool half_pixel_centers,
			
 
				+    int32_t input_size, int32_t* scaled_value, int32_t* lower_bound,
			
 
				+    int32_t* upper_bound) {
			
 
				+  if (half_pixel_centers) {
			
 
				+    *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
			
 
				+  } else {
			
 
				+    *scaled_value = value * scale_10;
			
 
				+  }
			
 
				+  constexpr int32_t zero = 0;
			
 
				+  *lower_bound = std::max(*scaled_value / (1 << 10), zero);
			
 
				+  *upper_bound =
			
 
				+      std::min((*scaled_value + (1 << 10) - 1) / (1 << 10), input_size - 1);
			
 
				+}
			
 
				+
			
 
				+// Same as above but doesn't use any floating-point for the resize
			
 
				+template <typename T>
			
 
				+inline void ResizeBilinearInteger(
			
 
				+    const tflite::ResizeBilinearParams& op_params,
			
 
				+    const RuntimeShape& unextended_input_shape, const T* input_data,
			
 
				+    const RuntimeShape& unextended_output_size_shape,
			
 
				+    const int32_t* output_size_data,
			
 
				+    const RuntimeShape& unextended_output_shape, T* output_data) {
			
 
				+  // If half_pixel_centers is True, align_corners must be False.
			
 
				+  TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
			
 
				+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_size_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				+  const int32_t input_height = input_shape.Dims(1);
			
 
				+  const int32_t input_width = input_shape.Dims(2);
			
 
				+  const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				+
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
			
 
				+  TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
			
 
				+  const int32_t output_height =
			
 
				+      output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
			
 
				+  const int32_t output_width =
			
 
				+      output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
			
 
				+
			
 
				+  int32_t height_scale_10 =
			
 
				+      ((1 << 10) * input_height + output_height / 2) / output_height;
			
 
				+  int32_t width_scale_10 =
			
 
				+      ((1 << 10) * input_width + output_width / 2) / output_width;
			
 
				+  if (op_params.align_corners && output_height > 1) {
			
 
				+    height_scale_10 =
			
 
				+        ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) /
			
 
				+        (output_height - 1);
			
 
				+  }
			
 
				+  if (op_params.align_corners && output_width > 1) {
			
 
				+    width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) /
			
 
				+                     (output_width - 1);
			
 
				+  }
			
 
				+
			
 
				+  for (int b = 0; b < batches; ++b) {
			
 
				+    for (int y = 0; y < output_height; ++y) {
			
 
				+      int32_t input_y, y0, y1;
			
 
				+      ComputeInterpolationValuesInteger(y, height_scale_10,
			
 
				+                                        op_params.half_pixel_centers,
			
 
				+                                        input_height, &input_y, &y0, &y1);
			
 
				+      for (int x = 0; x < output_width; ++x) {
			
 
				+        int32_t input_x, x0, x1;
			
 
				+        ComputeInterpolationValuesInteger(x, width_scale_10,
			
 
				+                                          op_params.half_pixel_centers,
			
 
				+                                          input_width, &input_x, &x0, &x1);
			
 
				+        for (int c = 0; c < depth; ++c) {
			
 
				+          const int64_t output_20_ll =
			
 
				+              static_cast<int64_t>(
			
 
				+                  input_data[Offset(input_shape, b, y0, x0, c)]) *
			
 
				+              ((1 << 10) - (input_y - (1 << 10) * y0)) *
			
 
				+              ((1 << 10) - (input_x - (1 << 10) * x0));
			
 
				+          const int64_t output_20_lu =
			
 
				+              static_cast<int64_t>(
			
 
				+                  input_data[Offset(input_shape, b, y1, x0, c)]) *
			
 
				+              (input_y - (1 << 10) * y0) *
			
 
				+              ((1 << 10) - (input_x - (1 << 10) * x0));
			
 
				+          const int64_t output_20_rl =
			
 
				+              static_cast<int64_t>(
			
 
				+                  input_data[Offset(input_shape, b, y0, x1, c)]) *
			
 
				+              ((1 << 10) - (input_y - (1 << 10) * y0)) *
			
 
				+              (input_x - (1 << 10) * x0);
			
 
				+          const int64_t output_20_ru =
			
 
				+              static_cast<int64_t>(
			
 
				+                  input_data[Offset(input_shape, b, y1, x1, c)]) *
			
 
				+              (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
			
 
				+          const int64_t output_20 =
			
 
				+              output_20_ll + output_20_lu + output_20_rl + output_20_ru;
			
 
				+          const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
			
 
				+          const T interpolation =
			
 
				+              static_cast<T>((output_20 + round) / (1 << 20));
			
 
				+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/softmax.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/softmax.h
@@ -159,7 +159,7 @@ inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params,
 
				       std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
			
 
				                static_cast<int32_t>(32767));
			
 
				   // apply the exp() LUT activation function
			
 
				-  return generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
			
 
				+  return lut_lookup(sat_sym_scaled_diff, params.exp_lut);
			
 
				 }
			
 
				 // Quantized softmax with int16_t input and int16_t output.
			
 
				 inline void SoftmaxInt16(const SoftmaxParams& params,
			
@@ -207,8 +207,8 @@ inline void SoftmaxInt16(const SoftmaxParams& params,
 
				         std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
			
 
				                  static_cast<int32_t>(32767)));
			
 
				     // apply 1/(1 + x) LUT activation function
			
 
				-    int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
			
 
				-        sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
			
 
				+    int16_t reciprocal_scale_Q015 =
			
 
				+        lut_lookup(sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
			
 
				 
			
 
				     // Rescale the exp_result with reciprocal
			
 
				     // range of output is [0, 32767] correspond to [0.0, 1.0]
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/space_to_depth.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/space_to_depth.h
@@ -0,0 +1,80 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T>
			
 
				+inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
			
 
				+                         const RuntimeShape& unextended_input_shape,
			
 
				+                         const T* input_data,
			
 
				+                         const RuntimeShape& unextended_output_shape,
			
 
				+                         T* output_data) {
			
 
				+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				+  const RuntimeShape input_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				+  const RuntimeShape output_shape =
			
 
				+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				+
			
 
				+  const int input_depth = input_shape.Dims(3);
			
 
				+  const int input_width = input_shape.Dims(2);
			
 
				+  const int input_height = input_shape.Dims(1);
			
 
				+  const int input_batch = input_shape.Dims(0);
			
 
				+
			
 
				+  const int output_depth = output_shape.Dims(3);
			
 
				+  const int output_width = output_shape.Dims(2);
			
 
				+  const int output_height = output_shape.Dims(1);
			
 
				+  const int output_batch = output_shape.Dims(0);
			
 
				+
			
 
				+  const int32_t block_size = op_params.block_size;
			
 
				+
			
 
				+  TFLITE_DCHECK_EQ(input_width, output_width * block_size);
			
 
				+  TFLITE_DCHECK_EQ(input_height, output_height * block_size);
			
 
				+  TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth);
			
 
				+  TFLITE_DCHECK_EQ(input_batch, output_batch);
			
 
				+
			
 
				+  for (int in_b = 0; in_b < input_batch; ++in_b) {
			
 
				+    for (int in_h = 0; in_h < input_height; ++in_h) {
			
 
				+      for (int in_w = 0; in_w < input_width; ++in_w) {
			
 
				+        for (int in_d = 0; in_d < input_depth; ++in_d) {
			
 
				+          const int out_d =
			
 
				+              in_d + ((in_h % block_size) * block_size + in_w % block_size) *
			
 
				+                         input_depth;
			
 
				+          const int out_w = in_w / block_size;
			
 
				+          const int out_h = in_h / block_size;
			
 
				+          const int out_b = in_b;
			
 
				+
			
 
				+          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
			
 
				+          const int output_index =
			
 
				+              Offset(output_shape, out_b, out_h, out_w, out_d);
			
 
				+
			
 
				+          output_data[output_index] = input_data[input_index];
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/transpose.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/reference/transpose.h
@@ -0,0 +1,111 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
			
 
				+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace reference_ops {
			
 
				+
			
 
				+template <typename T, int N>
			
 
				+void TransposeImpl(const TransposeParams& params,
			
 
				+                   const RuntimeShape& unextended_input_shape,
			
 
				+                   const T* input_data,
			
 
				+                   const RuntimeShape& unextended_output_shape,
			
 
				+                   T* output_data) {
			
 
				+  const int unextended_input_size = unextended_input_shape.DimensionsCount();
			
 
				+  const int unextended_output_size = unextended_output_shape.DimensionsCount();
			
 
				+  TFLITE_DCHECK_LE(unextended_input_size, N);
			
 
				+  TFLITE_DCHECK_LE(unextended_output_size, N);
			
 
				+  TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count);
			
 
				+  const int input_ext_size = N - unextended_input_size;
			
 
				+  const int output_ext_size = N - unextended_output_size;
			
 
				+  NdArrayDesc<N> input_desc;
			
 
				+  NdArrayDesc<N> output_desc;
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
			
 
				+                 &input_desc);
			
 
				+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				+                 &output_desc);
			
 
				+
			
 
				+  // The perm data is extended to match the output, each index incremented by
			
 
				+  // the amount of front padding of the input shape.
			
 
				+  int extended_perm[N];
			
 
				+  for (int i = 0; i < N; ++i) {
			
 
				+    extended_perm[i] = i < output_ext_size
			
 
				+                           ? i
			
 
				+                           : params.perm[i - output_ext_size] + input_ext_size;
			
 
				+  }
			
 
				+
			
 
				+  // Permutes the input shape so we don't need to permute the indexes inside
			
 
				+  // the loop. Check to make sure output_dims is matching input_dims.
			
 
				+  NdArrayDesc<N> perm_input_desc;
			
 
				+  for (int k = 0; k < N; ++k) {
			
 
				+    TFLITE_DCHECK_EQ(input_desc.extents[extended_perm[k]],
			
 
				+                     output_desc.extents[k]);
			
 
				+    perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
			
 
				+    perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
			
 
				+  }
			
 
				+
			
 
				+  // Naive transpose loop (iterate on output index and compute input index).
			
 
				+  auto tranpose_func = [&](int indexes[N]) {
			
 
				+    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				+        input_data[SubscriptToIndex(perm_input_desc, indexes)];
			
 
				+  };
			
 
				+  NDOpsHelper<N>(output_desc, tranpose_func);
			
 
				+}
			
 
				+
			
 
				+template <typename T, int N = 5>
			
 
				+void Transpose(const TransposeParams& params,
			
 
				+               const RuntimeShape& unextended_input_shape, const T* input_data,
			
 
				+               const RuntimeShape& unextended_output_shape, T* output_data) {
			
 
				+  // Transpose kernel only does rearranging values not numeric evaluations on
			
 
				+  // each cell. It's safe to implement per size of scalar type and this trick
			
 
				+  // keeps the total code size in a reasonable range.
			
 
				+  switch (sizeof(T)) {
			
 
				+    case 1:
			
 
				+      TransposeImpl<int8_t, N>(params, unextended_input_shape,
			
 
				+                               reinterpret_cast<const int8_t*>(input_data),
			
 
				+                               unextended_output_shape,
			
 
				+                               reinterpret_cast<int8_t*>(output_data));
			
 
				+      break;
			
 
				+    case 2:
			
 
				+      TransposeImpl<int16_t, N>(params, unextended_input_shape,
			
 
				+                                reinterpret_cast<const int16_t*>(input_data),
			
 
				+                                unextended_output_shape,
			
 
				+                                reinterpret_cast<int16_t*>(output_data));
			
 
				+      break;
			
 
				+
			
 
				+    case 4:
			
 
				+      TransposeImpl<int32_t, N>(params, unextended_input_shape,
			
 
				+                                reinterpret_cast<const int32_t*>(input_data),
			
 
				+                                unextended_output_shape,
			
 
				+                                reinterpret_cast<int32_t*>(output_data));
			
 
				+      break;
			
 
				+    case 8:
			
 
				+      TransposeImpl<int64_t, N>(params, unextended_input_shape,
			
 
				+                                reinterpret_cast<const int64_t*>(input_data),
			
 
				+                                unextended_output_shape,
			
 
				+                                reinterpret_cast<int64_t*>(output_data));
			
 
				+      break;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace reference_ops
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/internal/types.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/internal/types.h
@@ -400,13 +400,22 @@ inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
 
				   return offset;
			
 
				 }
			
 
				 
			
 
				+// Since tensors with '0' in their shape are valid in TF, these offset functions
			
 
				+// allow that as long as the corresponding index is also 0. It is upto the
			
 
				+// calling ops to ensure that they perform verification checks on tensor shapes
			
 
				+// if they don't support a particular behavior.
			
 
				+
			
 
				 inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
			
 
				   TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
			
 
				   const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
			
 
				-  TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
			
 
				-  TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
			
 
				-  TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
			
 
				-  TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
			
 
				+  TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
			
 
				+                (i0 >= 0 && i0 < dims_data[0]));
			
 
				+  TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
			
 
				+                (i1 >= 0 && i1 < dims_data[1]));
			
 
				+  TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
			
 
				+                (i2 >= 0 && i2 < dims_data[2]));
			
 
				+  TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
			
 
				+                (i3 >= 0 && i3 < dims_data[3]));
			
 
				   return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
			
 
				 }
			
 
				 
			
@@ -414,21 +423,34 @@ inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3,
 
				                   int i4) {
			
 
				   TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5);
			
 
				   const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
			
 
				-  TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
			
 
				-  TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
			
 
				-  TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
			
 
				-  TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
			
 
				-  TFLITE_DCHECK(i4 >= 0 && i4 < dims_data[4]);
			
 
				+  TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
			
 
				+                (i0 >= 0 && i0 < dims_data[0]));
			
 
				+  TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
			
 
				+                (i1 >= 0 && i1 < dims_data[1]));
			
 
				+  TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
			
 
				+                (i2 >= 0 && i2 < dims_data[2]));
			
 
				+  TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
			
 
				+                (i3 >= 0 && i3 < dims_data[3]));
			
 
				+  TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) ||
			
 
				+                (i4 >= 0 && i4 < dims_data[4]));
			
 
				   return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) *
			
 
				              dims_data[4] +
			
 
				          i4;
			
 
				 }
			
 
				 
			
 
				+inline int Offset(const RuntimeShape& shape, int* index) {
			
 
				+  return Offset(shape, index[0], index[1], index[2], index[3]);
			
 
				+}
			
 
				+
			
 
				 inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
			
 
				-  TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
			
 
				-  TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
			
 
				-  TFLITE_DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
			
 
				-  TFLITE_DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
			
 
				+  TFLITE_DCHECK((i0 == 0 && dims.sizes[0] == 0) ||
			
 
				+                (i0 >= 0 && i0 < dims.sizes[0]));
			
 
				+  TFLITE_DCHECK((i1 == 0 && dims.sizes[1] == 0) ||
			
 
				+                (i1 >= 0 && i1 < dims.sizes[1]));
			
 
				+  TFLITE_DCHECK((i2 == 0 && dims.sizes[2] == 0) ||
			
 
				+                (i2 >= 0 && i2 < dims.sizes[2]));
			
 
				+  TFLITE_DCHECK((i3 == 0 && dims.sizes[3] == 0) ||
			
 
				+                (i3 >= 0 && i3 < dims.sizes[3]));
			
 
				   return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] +
			
 
				          i3 * dims.strides[3];
			
 
				 }
			
@@ -437,10 +459,6 @@ inline int Offset(const Dims<4>& dims, int* index) {
 
				   return Offset(dims, index[0], index[1], index[2], index[3]);
			
 
				 }
			
 
				 
			
 
				-inline int Offset(const RuntimeShape& shape, int* index) {
			
 
				-  return Offset(shape, index[0], index[1], index[2], index[3]);
			
 
				-}
			
 
				-
			
 
				 // Get array size, DCHECKing that the dim index is in range.
			
 
				 //
			
 
				 // Note that this will be phased out with Dims<4>, since RuntimeShape::Dims()
			
@@ -602,6 +620,58 @@ inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0,
 
				   return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
			
 
				 }
			
 
				 
			
 
				+// Flat size calculation, checking if their extended shapes match.
			
 
				+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
			
 
				+                                         const RuntimeShape& check_shape_0) {
			
 
				+  const int shape_dims = shape.DimensionsCount();
			
 
				+  const int check_shape_0_dims = check_shape_0.DimensionsCount();
			
 
				+  const int min_dims = std::min(shape_dims, check_shape_0_dims);
			
 
				+
			
 
				+  for (int i = 0; i < min_dims; ++i) {
			
 
				+    TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i),
			
 
				+                     check_shape_0.Dims(check_shape_0_dims - 1 - i));
			
 
				+  }
			
 
				+  for (int i = min_dims; i < shape_dims; ++i) {
			
 
				+    TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), 1);
			
 
				+  }
			
 
				+  for (int i = min_dims; i < check_shape_0_dims; ++i) {
			
 
				+    TFLITE_DCHECK_EQ(check_shape_0.Dims(check_shape_0_dims - 1 - i), 1);
			
 
				+  }
			
 
				+  return shape.FlatSize();
			
 
				+}
			
 
				+
			
 
				+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
			
 
				+                                         const RuntimeShape& check_shape_0,
			
 
				+                                         const RuntimeShape& check_shape_1) {
			
 
				+  const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
			
 
				+  TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1),
			
 
				+                   flat_size);
			
 
				+  return flat_size;
			
 
				+}
			
 
				+
			
 
				+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
			
 
				+                                         const RuntimeShape& check_shape_0,
			
 
				+                                         const RuntimeShape& check_shape_1,
			
 
				+                                         const RuntimeShape& check_shape_2) {
			
 
				+  const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
			
 
				+  TFLITE_DCHECK_EQ(
			
 
				+      MatchingExtendedShapeFlatSize(shape, check_shape_1, check_shape_2),
			
 
				+      flat_size);
			
 
				+  return flat_size;
			
 
				+}
			
 
				+
			
 
				+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
			
 
				+                                         const RuntimeShape& check_shape_0,
			
 
				+                                         const RuntimeShape& check_shape_1,
			
 
				+                                         const RuntimeShape& check_shape_2,
			
 
				+                                         const RuntimeShape& check_shape_3) {
			
 
				+  const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
			
 
				+  TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1,
			
 
				+                                                 check_shape_2, check_shape_3),
			
 
				+                   flat_size);
			
 
				+  return flat_size;
			
 
				+}
			
 
				+
			
 
				 // Data is required to be contiguous, and so many operators can use either the
			
 
				 // full array flat size or the flat size with one dimension skipped (commonly
			
 
				 // the depth).
			
@@ -885,6 +955,8 @@ struct Conv3DParams {
 
				   float float_activation_max;
			
 
				 };
			
 
				 
			
 
				+typedef Conv3DParams Conv3DTransposeParams;
			
 
				+
			
 
				 struct DepthToSpaceParams {
			
 
				   int32_t block_size;
			
 
				 };
			
@@ -1019,9 +1091,9 @@ struct PackParams {
 
				 
			
 
				 struct PadParams {
			
 
				   int8_t left_padding_count;
			
 
				-  int32_t left_padding[4];
			
 
				+  int32_t left_padding[5];
			
 
				   int8_t right_padding_count;
			
 
				-  int32_t right_padding[4];
			
 
				+  int32_t right_padding[5];
			
 
				   ResizingCategory resizing_category;
			
 
				 };
			
 
				 
			
@@ -1196,6 +1268,23 @@ inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
 
				   *min = params.int64_activation_min;
			
 
				   *max = params.int64_activation_max;
			
 
				 }
			
 
				+
			
 
				+// Type trait to check of given type has size smaller than 4 bytes.
			
 
				+template <typename T>
			
 
				+struct is_small_integer
			
 
				+    : public std::integral_constant<bool,
			
 
				+                                    std::is_same<T, int8_t>::value ||
			
 
				+                                        std::is_same<T, uint8_t>::value ||
			
 
				+                                        std::is_same<T, int16_t>::value ||
			
 
				+                                        std::is_same<T, uint16_t>::value> {};
			
 
				+
			
 
				+// Type trait to check of given type is int32 or int64.
			
 
				+template <typename T>
			
 
				+struct is_int32_or_int64
			
 
				+    : public std::integral_constant<bool, std::is_same<T, int32_t>::value ||
			
 
				+                                              std::is_same<T, int64_t>::value> {
			
 
				+};
			
 
				+
			
 
				 }  // namespace tflite
			
 
				 
			
 
				 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/kernel_util.cc
+++ b/code/components/tfmicro/tensorflow/lite/kernels/kernel_util.cc
@@ -119,6 +119,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
 
				 TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
			
 
				                                int index) {
			
 
				   TfLiteTensor* tensor = GetMutableInput(context, node, index);
			
 
				+  if (tensor == nullptr) return nullptr;
			
 
				   return tensor->is_variable ? tensor : nullptr;
			
 
				 }
			
 
				 
			
@@ -197,7 +198,7 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
 
				     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
			
 
				     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
			
 
				     int32_t* output_activation_min, int32_t* output_activation_max,
			
 
				-    int32_t* per_channel_multiplier, int* per_channel_shift) {
			
 
				+    int32_t* per_channel_multiplier, int32_t* per_channel_shift) {
			
 
				   const auto* affine_quantization =
			
 
				       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
			
 
				   return PopulateConvolutionQuantizationParams(
			
@@ -212,7 +213,8 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
 
				     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
			
 
				     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
			
 
				     int32_t* output_activation_min, int32_t* output_activation_max,
			
 
				-    int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
			
 
				+    int32_t* per_channel_multiplier, int32_t* per_channel_shift,
			
 
				+    int num_channels) {
			
 
				   TF_LITE_ENSURE_EQ(context, input->quantization.type,
			
 
				                     kTfLiteAffineQuantization);
			
 
				   TF_LITE_ENSURE_EQ(context, filter->quantization.type,
			
@@ -333,30 +335,49 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
 
				 }
			
 
				 
			
 
				 namespace {
			
 
				-void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
			
 
				-                                           int32_t qmin, int32_t qmax,
			
 
				-                                           TfLiteTensor* output,
			
 
				-                                           int32_t* act_min, int32_t* act_max) {
			
 
				+
			
 
				+inline TfLiteStatus Quantize(TfLiteContext* context, float scale,
			
 
				+                             int32_t zero_point, float f, int32_t& q) {
			
 
				+  const float tmp = TfLiteRound(f / scale);
			
 
				+  const bool no_integer_overflow_from_quantization =
			
 
				+      (tmp >= static_cast<float>(std::numeric_limits<int32_t>::min()) &&
			
 
				+       tmp <= static_cast<float>(std::numeric_limits<int32_t>::max()));
			
 
				+  TF_LITE_ENSURE(context, no_integer_overflow_from_quantization);
			
 
				+  q = zero_point + static_cast<int32_t>(tmp);
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus CalculateActivationRangeQuantizedImpl(
			
 
				+    TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin,
			
 
				+    int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) {
			
 
				   const auto scale = output->params.scale;
			
 
				   const auto zero_point = output->params.zero_point;
			
 
				 
			
 
				-  auto quantize = [scale, zero_point](float f) {
			
 
				-    return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
			
 
				-  };
			
 
				-
			
 
				+  int32_t tmp_q;
			
 
				   if (activation == kTfLiteActRelu) {
			
 
				-    *act_min = std::max(qmin, quantize(0.0));
			
 
				+    TF_LITE_ENSURE_OK(context,
			
 
				+                      Quantize(context, scale, zero_point, 0.0, tmp_q));
			
 
				+    *act_min = std::max(qmin, tmp_q);
			
 
				     *act_max = qmax;
			
 
				   } else if (activation == kTfLiteActRelu6) {
			
 
				-    *act_min = std::max(qmin, quantize(0.0));
			
 
				-    *act_max = std::min(qmax, quantize(6.0));
			
 
				+    TF_LITE_ENSURE_OK(context,
			
 
				+                      Quantize(context, scale, zero_point, 0.0, tmp_q));
			
 
				+    *act_min = std::max(qmin, tmp_q);
			
 
				+    TF_LITE_ENSURE_OK(context,
			
 
				+                      Quantize(context, scale, zero_point, 6.0, tmp_q));
			
 
				+    *act_max = std::min(qmax, tmp_q);
			
 
				   } else if (activation == kTfLiteActReluN1To1) {
			
 
				-    *act_min = std::max(qmin, quantize(-1.0));
			
 
				-    *act_max = std::min(qmax, quantize(1.0));
			
 
				+    TF_LITE_ENSURE_OK(context,
			
 
				+                      Quantize(context, scale, zero_point, -1.0, tmp_q));
			
 
				+    *act_min = std::max(qmin, tmp_q);
			
 
				+    TF_LITE_ENSURE_OK(context,
			
 
				+                      Quantize(context, scale, zero_point, 1.0, tmp_q));
			
 
				+    *act_max = std::min(qmax, tmp_q);
			
 
				   } else {
			
 
				     *act_min = qmin;
			
 
				     *act_max = qmax;
			
 
				   }
			
 
				+  return kTfLiteOk;
			
 
				 }
			
 
				 }  // namespace
			
 
				 
			
@@ -380,9 +401,8 @@ TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
 
				     TF_LITE_ENSURE(context, false);
			
 
				   }
			
 
				 
			
 
				-  CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
			
 
				-                                        act_max);
			
 
				-  return kTfLiteOk;
			
 
				+  return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax,
			
 
				+                                               output, act_min, act_max);
			
 
				 }
			
 
				 
			
 
				 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
			
@@ -412,18 +432,15 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
 
				                                         const TfLiteTensor* input1,
			
 
				                                         const TfLiteTensor* input2,
			
 
				                                         TfLiteIntArray** output_shape) {
			
 
				-  int dims1 = NumDimensions(input1);
			
 
				-  int dims2 = NumDimensions(input2);
			
 
				-  int out_dims = std::max(dims1, dims2);
			
 
				-  if (NumElements(input1) == 0) {
			
 
				-    *output_shape = TfLiteIntArrayCopy(input1->dims);
			
 
				-    return kTfLiteOk;
			
 
				-  }
			
 
				+  const int dims1 = NumDimensions(input1);
			
 
				+  const int dims2 = NumDimensions(input2);
			
 
				+  const int out_dims = std::max(dims1, dims2);
			
 
				+
			
 
				   std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
			
 
				       TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
			
 
				   for (int i = 0; i < out_dims; ++i) {
			
 
				-    int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
			
 
				-    int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
			
 
				+    const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
			
 
				+    const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
			
 
				     if (!(d1 == d2 || d1 == 1 || d2 == 1)) {
			
 
				       context->ReportError(context,
			
 
				                            "Given shapes, %s and %s, are not broadcastable.",
			
@@ -431,7 +448,12 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
 
				                            GetShapeDebugString(input2->dims).c_str());
			
 
				       return kTfLiteError;
			
 
				     }
			
 
				-    shape->data[out_dims - i - 1] = std::max(d1, d2);
			
 
				+
			
 
				+    if (d1 == 0 || d2 == 0) {
			
 
				+      shape->data[out_dims - i - 1] = 0;
			
 
				+    } else {
			
 
				+      shape->data[out_dims - i - 1] = std::max(d1, d2);
			
 
				+    }
			
 
				   }
			
 
				   *output_shape = shape.release();
			
 
				   return kTfLiteOk;
			
@@ -442,17 +464,20 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
 
				                                         const TfLiteTensor* input2,
			
 
				                                         const TfLiteTensor* input3,
			
 
				                                         TfLiteIntArray** output_shape) {
			
 
				-  int dims1 = NumDimensions(input1);
			
 
				-  int dims2 = NumDimensions(input2);
			
 
				-  int dims3 = NumDimensions(input3);
			
 
				-  int out_dims = std::max(std::max(dims1, dims2), dims3);
			
 
				+  const int dims1 = NumDimensions(input1);
			
 
				+  const int dims2 = NumDimensions(input2);
			
 
				+  const int dims3 = NumDimensions(input3);
			
 
				+  const int out_dims = std::max(std::max(dims1, dims2), dims3);
			
 
				   std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
			
 
				       TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
			
 
				   for (int i = 0; i < out_dims; ++i) {
			
 
				-    int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
			
 
				-    int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
			
 
				-    int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
			
 
				+    const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
			
 
				+    const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
			
 
				+    const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
			
 
				+    const int min_value = std::min(std::min(d1, d2), d3);
			
 
				     int max_value = std::max(std::max(d1, d2), d3);
			
 
				+    // If one dimention is 0, others must be 0 or 1.
			
 
				+    if (min_value == 0) max_value = 0;
			
 
				     if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
			
 
				         !(d3 == 1 || d3 == max_value)) {
			
 
				       context->ReportError(
			
@@ -473,42 +498,42 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
 
				 int TfLiteTypeGetSize(TfLiteType type) {
			
 
				   switch (type) {
			
 
				     case kTfLiteUInt8:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(uint8_t), 1);
			
 
				+      static_assert(sizeof(uint8_t) == 1, "");
			
 
				       return 1;
			
 
				     case kTfLiteInt8:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(int8_t), 1);
			
 
				+      static_assert(sizeof(int8_t) == 1, "");
			
 
				       return 1;
			
 
				     case kTfLiteBool:
			
 
				       return sizeof(bool);
			
 
				     case kTfLiteInt16:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
			
 
				+      static_assert(sizeof(int16_t) == 2, "");
			
 
				       return 2;
			
 
				     case kTfLiteFloat16:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
			
 
				+      static_assert(sizeof(int16_t) == 2, "");
			
 
				       return 2;
			
 
				     case kTfLiteFloat32:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(float), 4);
			
 
				+      static_assert(sizeof(float) == 4, "");
			
 
				       return 4;
			
 
				     case kTfLiteInt32:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(int32_t), 4);
			
 
				+      static_assert(sizeof(int32_t) == 4, "");
			
 
				       return 4;
			
 
				     case kTfLiteUInt32:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(uint32_t), 4);
			
 
				+      static_assert(sizeof(uint32_t) == 4, "");
			
 
				       return 4;
			
 
				     case kTfLiteInt64:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
			
 
				+      static_assert(sizeof(int64_t) == 8, "");
			
 
				       return 8;
			
 
				     case kTfLiteUInt64:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8);
			
 
				+      static_assert(sizeof(uint64_t) == 8, "");
			
 
				       return 8;
			
 
				     case kTfLiteFloat64:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(double), 8);
			
 
				+      static_assert(sizeof(double) == 8, "");
			
 
				       return 8;
			
 
				     case kTfLiteComplex64:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(std::complex<float>), 8);
			
 
				+      static_assert(sizeof(std::complex<float>) == 8, "");
			
 
				       return 8;
			
 
				     case kTfLiteComplex128:
			
 
				-      TF_LITE_ASSERT_EQ(sizeof(std::complex<double>), 16);
			
 
				+      static_assert(sizeof(std::complex<double>) == 16, "");
			
 
				       return 16;
			
 
				     default:
			
 
				       return 0;
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/kernel_util.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/kernel_util.h
@@ -214,14 +214,15 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
 
				     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
			
 
				     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
			
 
				     int32_t* output_activation_min, int32_t* output_activation_max,
			
 
				-    int32_t* per_channel_multiplier, int* per_channel_shift);
			
 
				+    int32_t* per_channel_multiplier, int32_t* per_channel_shift);
			
 
				 
			
 
				 TfLiteStatus PopulateConvolutionQuantizationParams(
			
 
				     TfLiteContext* context, const TfLiteTensor* input,
			
 
				     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
			
 
				     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
			
 
				     int32_t* output_activation_min, int32_t* output_activation_max,
			
 
				-    int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels);
			
 
				+    int32_t* per_channel_multiplier, int32_t* per_channel_shift,
			
 
				+    int num_channels);
			
 
				 
			
 
				 // Calculates the multiplication factor for a quantized convolution (or
			
 
				 // quantized depthwise convolution) involving the given tensors. Returns an
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/op_macros.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/op_macros.h
@@ -15,69 +15,24 @@ limitations under the License.
 
				 #ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
			
 
				 #define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
			
 
				 
			
 
				-// If we're on a platform without standard IO functions, fall back to a
			
 
				-// non-portable function.
			
 
				-#ifdef TF_LITE_MCU_DEBUG_LOG
			
 
				-
			
 
				 #include "tensorflow/lite/micro/debug_log.h"
			
 
				 
			
 
				-#define DEBUG_LOG(x) \
			
 
				-  do {               \
			
 
				-    DebugLog(x);     \
			
 
				-  } while (0)
			
 
				-
			
 
				-inline void InfiniteLoop() {
			
 
				-  DEBUG_LOG("HALTED\n");
			
 
				+#if !defined(TF_LITE_MCU_DEBUG_LOG)
			
 
				+#include <cstdlib>
			
 
				+#define TFLITE_ABORT abort()
			
 
				+#else
			
 
				+inline void AbortImpl() {
			
 
				+  DebugLog("HALTED\n");
			
 
				   while (1) {
			
 
				   }
			
 
				 }
			
 
				+#define TFLITE_ABORT AbortImpl();
			
 
				+#endif
			
 
				 
			
 
				-#define TFLITE_ABORT InfiniteLoop();
			
 
				-
			
 
				-#else  // TF_LITE_MCU_DEBUG_LOG
			
 
				-
			
 
				-#include <cstdio>
			
 
				-#include <cstdlib>
			
 
				-
			
 
				-#define DEBUG_LOG(x)            \
			
 
				-  do {                          \
			
 
				-    fprintf(stderr, "%s", (x)); \
			
 
				-  } while (0)
			
 
				-
			
 
				-// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
			
 
				-#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name)                    \
			
 
				-  do {                                                                      \
			
 
				-    TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
			
 
				-                       __FILE__, __LINE__, TfLiteTypeGetName(type),         \
			
 
				-                       (op_name));                                          \
			
 
				-    return kTfLiteError;                                                    \
			
 
				-  } while (0)
			
 
				-
			
 
				-#define TFLITE_ABORT abort()
			
 
				-
			
 
				-#endif  // TF_LITE_MCU_DEBUG_LOG
			
 
				-
			
 
				-#if defined(NDEBUG) || defined(ARDUINO)
			
 
				+#if defined(NDEBUG)
			
 
				 #define TFLITE_ASSERT_FALSE (static_cast<void>(0))
			
 
				 #else
			
 
				 #define TFLITE_ASSERT_FALSE TFLITE_ABORT
			
 
				 #endif
			
 
				 
			
 
				-#define TF_LITE_FATAL(msg)  \
			
 
				-  do {                      \
			
 
				-    DEBUG_LOG(msg);         \
			
 
				-    DEBUG_LOG("\nFATAL\n"); \
			
 
				-    TFLITE_ABORT;           \
			
 
				-  } while (0)
			
 
				-
			
 
				-#define TF_LITE_ASSERT(x)        \
			
 
				-  do {                           \
			
 
				-    if (!(x)) TF_LITE_FATAL(#x); \
			
 
				-  } while (0)
			
 
				-
			
 
				-#define TF_LITE_ASSERT_EQ(x, y)                            \
			
 
				-  do {                                                     \
			
 
				-    if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
			
 
				-  } while (0)
			
 
				-
			
 
				 #endif  // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
			
--- a/code/components/tfmicro/tensorflow/lite/kernels/padding.h
+++ b/code/components/tfmicro/tensorflow/lite/kernels/padding.h
@@ -20,7 +20,6 @@ limitations under the License.
 
				 
			
 
				 namespace tflite {
			
 
				 
			
 
				-// TODO(renjieliu): Migrate others to use ComputePaddingWithLeftover.
			
 
				 inline int ComputePadding(int stride, int dilation_rate, int in_size,
			
 
				                           int filter_size, int out_size) {
			
 
				   int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
			
@@ -45,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
 
				 inline int ComputeOutSize(TfLitePadding padding, int image_size,
			
 
				                           int filter_size, int stride, int dilation_rate = 1) {
			
 
				   int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
			
 
				+
			
 
				+  // TODO(b/186448822): This uses 0 since the function has no other way to
			
 
				+  // report error case
			
 
				+  if (stride == 0) return 0;
			
 
				+
			
 
				   switch (padding) {
			
 
				     case kTfLitePaddingSame:
			
 
				       return (image_size + stride - 1) / stride;
			
--- a/code/components/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
@@ -32,14 +32,18 @@ AllOpsResolver::AllOpsResolver() {
 
				   AddConcatenation();
			
 
				   AddConv2D();
			
 
				   AddCos();
			
 
				+  AddCumSum();
			
 
				+  AddDepthToSpace();
			
 
				   AddDepthwiseConv2D();
			
 
				   AddDequantize();
			
 
				   AddDetectionPostprocess();
			
 
				-  AddDiv();
			
 
				   AddElu();
			
 
				   AddEqual();
			
 
				   AddEthosU();
			
 
				+  AddExpandDims();
			
 
				   AddFloor();
			
 
				+  AddFloorDiv();
			
 
				+  AddFloorMod();
			
 
				   AddFullyConnected();
			
 
				   AddGreater();
			
 
				   AddGreaterEqual();
			
@@ -70,6 +74,7 @@ AllOpsResolver::AllOpsResolver() {
 
				   AddRelu();
			
 
				   AddRelu6();
			
 
				   AddReshape();
			
 
				+  AddResizeBilinear();
			
 
				   AddResizeNearestNeighbor();
			
 
				   AddRound();
			
 
				   AddRsqrt();
			
@@ -77,6 +82,7 @@ AllOpsResolver::AllOpsResolver() {
 
				   AddSin();
			
 
				   AddSoftmax();
			
 
				   AddSpaceToBatchNd();
			
 
				+  AddSpaceToDepth();
			
 
				   AddSplit();
			
 
				   AddSplitV();
			
 
				   AddSqrt();
			
@@ -87,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
 
				   AddSvdf();
			
 
				   AddTanh();
			
 
				   AddTransposeConv();
			
 
				+  AddTranspose();
			
 
				   AddUnpack();
			
 
				 }
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.cc
@@ -0,0 +1,64 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/micro/flatbuffer_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
			
 
				+    : flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
			
 
				+
			
 
				+int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
			
 
				+  const uint8_t* elem = data_ + i * byte_width_;
			
 
				+  return ::flexbuffers::ReadInt64(elem, byte_width_);
			
 
				+}
			
 
				+
			
 
				+uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
			
 
				+  const uint8_t* elem = data_ + i * byte_width_;
			
 
				+  return ::flexbuffers::ReadUInt64(elem, byte_width_);
			
 
				+}
			
 
				+
			
 
				+int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
			
 
				+  return static_cast<int32_t>(ElementAsInt64(i));
			
 
				+}
			
 
				+
			
 
				+bool FlexbufferWrapper::ElementAsBool(size_t i) const {
			
 
				+  return static_cast<bool>(ElementAsUInt64(i));
			
 
				+}
			
 
				+
			
 
				+double FlexbufferWrapper::ElementAsDouble(size_t i) const {
			
 
				+  const uint8_t* elem = data_ + i * byte_width_;
			
 
				+  return ::flexbuffers::ReadDouble(elem, byte_width_);
			
 
				+}
			
 
				+
			
 
				+float FlexbufferWrapper::ElementAsFloat(size_t i) const {
			
 
				+  return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
			
 
				+}
			
 
				+
			
 
				+// TODO(b/192589496): Ops must always be there. Remove this function when fixed
			
 
				+uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
			
 
				+  if (subgraph->operators() != nullptr) {
			
 
				+    return subgraph->operators()->size();
			
 
				+  } else {
			
 
				+    return 0;
			
 
				+  }
			
 
				+}
			
 
				+// TODO(b/192589496): Ops must always be there. Remove this function when fixed
			
 
				+uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
			
 
				+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
			
 
				+  return NumSubgraphOperators(subgraph);
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/flatbuffer_utils.h
@@ -0,0 +1,56 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
			
 
				+#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
			
 
				+
			
 
				+#include "flatbuffers/flatbuffers.h"
			
 
				+#include "flatbuffers/flexbuffers.h"
			
 
				+#include "tensorflow/lite/schema/schema_generated.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
			
 
				+// with the parameter names as map keys and the parameter values as the
			
 
				+// corresponding map values.
			
 
				+// Accessing the map values using the flexbuffers:Map class is inline heavy,
			
 
				+// which can cause the code size to bloat beyond what's reasonable for a micro
			
 
				+// application. Use this class instead, when possible.
			
 
				+// FlexbufferWrapper takes advantage of the following properties of
			
 
				+// flexbuffers::Map:
			
 
				+// 1. It can be viewed as a flexbuffers::Vector of the values.
			
 
				+// 2. The values in the vector are ordered alphabetically by their keys.
			
 
				+// 3. All integer and Boolean values are stored as 64-bit numbers.
			
 
				+// 4. All floating point values are stored as double precision numbers.
			
 
				+// The properties are mentioned in the flexbuffers docs, but we rely on
			
 
				+// a unit test to catch design changes.
			
 
				+class FlexbufferWrapper : public flexbuffers::Vector {
			
 
				+ public:
			
 
				+  // Construct with a serialized flexbuffer 'buffer' of 'size' bytes
			
 
				+  explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
			
 
				+  int64_t ElementAsInt64(size_t i) const;
			
 
				+  uint64_t ElementAsUInt64(size_t i) const;
			
 
				+  int32_t ElementAsInt32(size_t i) const;
			
 
				+  bool ElementAsBool(size_t i) const;
			
 
				+  double ElementAsDouble(size_t i) const;
			
 
				+  float ElementAsFloat(size_t i) const;
			
 
				+};
			
 
				+
			
 
				+// Return the number of operators in a subgraph tflite
			
 
				+uint32_t NumSubgraphOperators(const SubGraph* subgraph);
			
 
				+uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.cc
@@ -1,4 +1,4 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				 
			
 
				 Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 you may not use this file except in compliance with the License.
			
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 
				 limitations under the License.
			
 
				 ==============================================================================*/
			
 
				 
			
 
				+#include "tensorflow/lite/micro/kernels/activations.h"
			
 
				+
			
 
				 #include "tensorflow/lite/c/builtin_op_data.h"
			
 
				 #include "tensorflow/lite/c/common.h"
			
 
				 #include "tensorflow/lite/kernels/internal/common.h"
			
@@ -25,141 +27,21 @@ limitations under the License.
 
				 #include "tensorflow/lite/micro/micro_utils.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				-namespace ops {
			
 
				-namespace micro {
			
 
				-namespace activations {
			
 
				 namespace {
			
 
				 
			
 
				-struct ReluOpData {
			
 
				-  ReluParams params;
			
 
				-};
			
 
				-
			
 
				-struct Relu6OpData {
			
 
				-  int8_t six_int8;
			
 
				-  int8_t zero_int8;
			
 
				-  uint8_t six_uint8;
			
 
				-  uint8_t zero_uint8;
			
 
				-};
			
 
				-
			
 
				-}  // namespace
			
 
				-
			
 
				-constexpr int kInputTensor = 0;
			
 
				-constexpr int kOutputTensor = 0;
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void ReluQuantized(const ReluOpData& data,
			
 
				-                          const RuntimeShape& input_shape,
			
 
				-                          const RuntimeShape& output_shape, const T* input_data,
			
 
				-                          T* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const int32_t val = static_cast<int32_t>(input_data[i]);
			
 
				-    int32_t clamped =
			
 
				-        data.params.output_offset +
			
 
				-        MultiplyByQuantizedMultiplier(val - data.params.input_offset,
			
 
				-                                      data.params.output_multiplier,
			
 
				-                                      data.params.output_shift);
			
 
				-    clamped = std::max(data.params.quantized_activation_min, clamped);
			
 
				-    clamped = std::min(data.params.quantized_activation_max, clamped);
			
 
				-    output_data[i] = static_cast<T>(clamped);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
			
 
				-                                ReluOpData* data) {
			
 
				-  float act_min = 0.0;
			
 
				-  float act_max = std::numeric_limits<float>::infinity();
			
 
				-  double real_multiplier =
			
 
				-      static_cast<double>(input->params.scale / output->params.scale);
			
 
				-
			
 
				-  const RuntimeShape input_shape = GetTensorShape(input);
			
 
				-  const RuntimeShape output_shape = GetTensorShape(output);
			
 
				-
			
 
				-  QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
			
 
				-                     &data->params.output_shift);
			
 
				-
			
 
				-  data->params.quantized_activation_min = std::max(
			
 
				-      static_cast<int32_t>(std::numeric_limits<T>::min()),
			
 
				-      output->params.zero_point +
			
 
				-          static_cast<int32_t>(roundf(act_min / output->params.scale)));
			
 
				-  data->params.quantized_activation_max =
			
 
				-      act_max == std::numeric_limits<float>::infinity()
			
 
				-          ? static_cast<int32_t>(std::numeric_limits<T>::max())
			
 
				-          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
			
 
				-                     output->params.zero_point +
			
 
				-                         static_cast<int32_t>(
			
 
				-                             roundf(act_max / output->params.scale)));
			
 
				-  data->params.input_offset = input->params.zero_point;
			
 
				-  data->params.output_offset = output->params.zero_point;
			
 
				-}
			
 
				-
			
 
				-inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
			
 
				-                      const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const float val = input_data[i];
			
 
				-    const float lower = 0.0f;
			
 
				-    const float clamped = val < lower ? lower : val;
			
 
				-    output_data[i] = clamped;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
			
 
				-                       const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const float val = input_data[i];
			
 
				-    const float upper = 6.0f;
			
 
				-    const float lower = 0.0f;
			
 
				-    const float clamped = val > upper ? upper : val < lower ? lower : val;
			
 
				-    output_data[i] = clamped;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename Q>
			
 
				-inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
			
 
				-                           const Q* input_data,
			
 
				-                           const RuntimeShape& output_shape, Q* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const Q val = input_data[i];
			
 
				-    const Q clamped = val > upper ? upper : val < lower ? lower : val;
			
 
				-    output_data[i] = clamped;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				 void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				   return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
			
 
				 }
			
 
				 
			
 
				-TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
			
 
				-
			
 
				-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				-  TF_LITE_ENSURE(context, input != nullptr);
			
 
				-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
			
 
				-  TF_LITE_ENSURE(context, output != nullptr);
			
 
				-
			
 
				-  if (input->type == kTfLiteInt8) {
			
 
				-    CalculateReluOpData<int8_t>(input, output, data);
			
 
				-  } else if (input->type == kTfLiteUInt8) {
			
 
				-    CalculateReluOpData<uint8_t>(input, output, data);
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				 TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   TFLITE_DCHECK(node->user_data != nullptr);
			
 
				   const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
			
 
				 
			
 
				   const TfLiteEvalTensor* input =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+      tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
			
 
				   TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+      tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
			
 
				 
			
 
				   switch (input->type) {
			
 
				     case kTfLiteFloat32: {
			
@@ -171,19 +53,12 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
 
				       return kTfLiteOk;
			
 
				     }
			
 
				     case kTfLiteInt8: {
			
 
				-      ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
			
 
				+      tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
			
 
				                             tflite::micro::GetTensorShape(output),
			
 
				                             tflite::micro::GetTensorData<int8_t>(input),
			
 
				                             tflite::micro::GetTensorData<int8_t>(output));
			
 
				       return kTfLiteOk;
			
 
				     }
			
 
				-    case kTfLiteUInt8: {
			
 
				-      ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
			
 
				-                             tflite::micro::GetTensorShape(output),
			
 
				-                             tflite::micro::GetTensorData<uint8_t>(input),
			
 
				-                             tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-      return kTfLiteOk;
			
 
				-    }
			
 
				     default: {
			
 
				       TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
			
 
				                          TfLiteTypeGetName(input->type));
			
@@ -197,34 +72,14 @@ void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
 
				   return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
			
 
				 }
			
 
				 
			
 
				-TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
			
 
				-
			
 
				-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				-  TF_LITE_ENSURE(context, input != nullptr);
			
 
				-
			
 
				-  if (input->type == kTfLiteInt8) {
			
 
				-    data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
			
 
				-                                                  input->params.zero_point);
			
 
				-    data->zero_int8 = input->params.zero_point;
			
 
				-  } else if (input->type == kTfLiteUInt8) {
			
 
				-    data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
			
 
				-                                                    input->params.zero_point);
			
 
				-    data->zero_uint8 = input->params.zero_point;
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				 TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   TFLITE_DCHECK(node->user_data != nullptr);
			
 
				   const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
			
 
				 
			
 
				   const TfLiteEvalTensor* input =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+      tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
			
 
				   TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+      tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
			
 
				 
			
 
				   switch (input->type) {
			
 
				     case kTfLiteFloat32: {
			
@@ -236,19 +91,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
 
				       return kTfLiteOk;
			
 
				     }
			
 
				     case kTfLiteInt8: {
			
 
				-      Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
			
 
				-                             tflite::micro::GetTensorShape(input),
			
 
				-                             tflite::micro::GetTensorData<int8_t>(input),
			
 
				-                             tflite::micro::GetTensorShape(output),
			
 
				-                             tflite::micro::GetTensorData<int8_t>(output));
			
 
				-      return kTfLiteOk;
			
 
				-    }
			
 
				-    case kTfLiteUInt8: {
			
 
				-      Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
			
 
				-                              tflite::micro::GetTensorShape(input),
			
 
				-                              tflite::micro::GetTensorData<uint8_t>(input),
			
 
				-                              tflite::micro::GetTensorShape(output),
			
 
				-                              tflite::micro::GetTensorData<uint8_t>(output));
			
 
				+      Relu6Quantized(data.zero_int8, data.six_int8,
			
 
				+                     tflite::micro::GetTensorShape(input),
			
 
				+                     tflite::micro::GetTensorData<int8_t>(input),
			
 
				+                     tflite::micro::GetTensorShape(output),
			
 
				+                     tflite::micro::GetTensorData<int8_t>(output));
			
 
				       return kTfLiteOk;
			
 
				     }
			
 
				     default: {
			
@@ -259,13 +106,13 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
 
				   }
			
 
				 }
			
 
				 
			
 
				-}  // namespace activations
			
 
				+}  // namespace
			
 
				 
			
 
				 TfLiteRegistration Register_RELU() {
			
 
				-  return {/*init=*/activations::ReluInit,
			
 
				+  return {/*init=*/ReluInit,
			
 
				           /*free=*/nullptr,
			
 
				-          /*prepare=*/activations::ReluPrepare,
			
 
				-          /*invoke=*/activations::ReluEval,
			
 
				+          /*prepare=*/ReluPrepare,
			
 
				+          /*invoke=*/ReluEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
@@ -273,16 +120,14 @@ TfLiteRegistration Register_RELU() {
 
				 }
			
 
				 
			
 
				 TfLiteRegistration Register_RELU6() {
			
 
				-  return {/*init=*/activations::Relu6Init,
			
 
				+  return {/*init=*/Relu6Init,
			
 
				           /*free=*/nullptr,
			
 
				-          /*prepare=*/activations::Relu6Prepare,
			
 
				-          /*invoke=*/activations::Relu6Eval,
			
 
				+          /*prepare=*/Relu6Prepare,
			
 
				+          /*invoke=*/Relu6Eval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
 
				           /*version=*/0};
			
 
				 }
			
 
				 
			
 
				-}  // namespace micro
			
 
				-}  // namespace ops
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/activations.h
@@ -0,0 +1,63 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
			
 
				+#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+extern const int kActivationsInputTensor;
			
 
				+extern const int kActivationsOutputTensor;
			
 
				+
			
 
				+struct ReluOpData {
			
 
				+  ReluParams params;
			
 
				+};
			
 
				+
			
 
				+struct Relu6OpData {
			
 
				+  int8_t six_int8;
			
 
				+  int8_t zero_int8;
			
 
				+};
			
 
				+
			
 
				+void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
			
 
				+                   const RuntimeShape& output_shape, const int8_t* input_data,
			
 
				+                   int8_t* output_data);
			
 
				+
			
 
				+template <typename T>
			
 
				+void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
			
 
				+                         ReluOpData* data);
			
 
				+
			
 
				+void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
			
 
				+               const RuntimeShape& output_shape, float* output_data);
			
 
				+
			
 
				+void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
			
 
				+                const RuntimeShape& output_shape, float* output_data);
			
 
				+
			
 
				+void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
			
 
				+                    const int8_t* input_data, const RuntimeShape& output_shape,
			
 
				+                    int8_t* output_data);
			
 
				+
			
 
				+TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
			
 
				+
			
 
				+TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/activations_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/activations_common.cc
@@ -0,0 +1,148 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/kernels/activations.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+const int kActivationsInputTensor = 0;
			
 
				+const int kActivationsOutputTensor = 0;
			
 
				+
			
 
				+void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
			
 
				+                   const RuntimeShape& output_shape, const int8_t* input_data,
			
 
				+                   int8_t* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    const int32_t val = static_cast<int32_t>(input_data[i]);
			
 
				+    int32_t clamped =
			
 
				+        data.params.output_offset +
			
 
				+        MultiplyByQuantizedMultiplier(val - data.params.input_offset,
			
 
				+                                      data.params.output_multiplier,
			
 
				+                                      data.params.output_shift);
			
 
				+    clamped = std::max(data.params.quantized_activation_min, clamped);
			
 
				+    clamped = std::min(data.params.quantized_activation_max, clamped);
			
 
				+    output_data[i] = static_cast<int8_t>(clamped);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
			
 
				+                         ReluOpData* data) {
			
 
				+  float act_min = 0.0;
			
 
				+  float act_max = std::numeric_limits<float>::infinity();
			
 
				+  double real_multiplier =
			
 
				+      static_cast<double>(input->params.scale / output->params.scale);
			
 
				+
			
 
				+  const RuntimeShape input_shape = GetTensorShape(input);
			
 
				+  const RuntimeShape output_shape = GetTensorShape(output);
			
 
				+
			
 
				+  QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
			
 
				+                     &data->params.output_shift);
			
 
				+
			
 
				+  data->params.quantized_activation_min = std::max(
			
 
				+      static_cast<int32_t>(std::numeric_limits<T>::min()),
			
 
				+      output->params.zero_point +
			
 
				+          static_cast<int32_t>(roundf(act_min / output->params.scale)));
			
 
				+  data->params.quantized_activation_max =
			
 
				+      act_max == std::numeric_limits<float>::infinity()
			
 
				+          ? static_cast<int32_t>(std::numeric_limits<T>::max())
			
 
				+          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
			
 
				+                     output->params.zero_point +
			
 
				+                         static_cast<int32_t>(
			
 
				+                             roundf(act_max / output->params.scale)));
			
 
				+  data->params.input_offset = input->params.zero_point;
			
 
				+  data->params.output_offset = output->params.zero_point;
			
 
				+}
			
 
				+
			
 
				+void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
			
 
				+               const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    const float val = input_data[i];
			
 
				+    const float lower = 0.0f;
			
 
				+    const float clamped = val < lower ? lower : val;
			
 
				+    output_data[i] = clamped;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
			
 
				+                const RuntimeShape& output_shape, float* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    const float val = input_data[i];
			
 
				+    const float upper = 6.0f;
			
 
				+    const float lower = 0.0f;
			
 
				+    const float clamped = val > upper ? upper : val < lower ? lower : val;
			
 
				+    output_data[i] = clamped;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
			
 
				+                    const int8_t* input_data, const RuntimeShape& output_shape,
			
 
				+                    int8_t* output_data) {
			
 
				+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				+  for (int i = 0; i < flat_size; ++i) {
			
 
				+    const int8_t val = input_data[i];
			
 
				+    const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
			
 
				+    output_data[i] = clamped;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				+  ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
			
 
				+
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
			
 
				+  TF_LITE_ENSURE(context, input != nullptr);
			
 
				+  TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
			
 
				+  TF_LITE_ENSURE(context, output != nullptr);
			
 
				+
			
 
				+  if (input->type == kTfLiteInt8) {
			
 
				+    CalculateReluOpData<int8_t>(input, output, data);
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				+  Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
			
 
				+
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
			
 
				+  TF_LITE_ENSURE(context, input != nullptr);
			
 
				+
			
 
				+  if (input->type == kTfLiteInt8) {
			
 
				+    data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
			
 
				+                                                  input->params.zero_point);
			
 
				+    data->zero_int8 = input->params.zero_point;
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/add.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/add.cc
@@ -66,12 +66,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
 
				                              OpData* data) {
			
 
				   data->requires_broadcast = !HaveSameShapes(input1, input2);
			
 
				 
			
 
				-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
			
 
				+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
			
 
				     // 8bit -> 8bit general quantized path, with general rescalings
			
 
				     data->input1_offset = -input1->params.zero_point;
			
 
				     data->input2_offset = -input2->params.zero_point;
			
 
				     data->output_offset = output->params.zero_point;
			
 
				-    data->left_shift = 20;
			
 
				+    data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
			
 
				     const double twice_max_input_scale =
			
 
				         2 * static_cast<double>(
			
 
				                 std::max(input1->params.scale, input2->params.scale));
			
@@ -133,24 +133,25 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
 
				                               const TfLiteEvalTensor* input1,
			
 
				                               const TfLiteEvalTensor* input2,
			
 
				                               TfLiteEvalTensor* output) {
			
 
				-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
			
 
				-    tflite::ArithmeticParams op_params;
			
 
				-    op_params.left_shift = data->left_shift;
			
 
				-    op_params.input1_offset = data->input1_offset;
			
 
				-    op_params.input1_multiplier = data->input1_multiplier;
			
 
				-    op_params.input1_shift = data->input1_shift;
			
 
				-    op_params.input2_offset = data->input2_offset;
			
 
				-    op_params.input2_multiplier = data->input2_multiplier;
			
 
				-    op_params.input2_shift = data->input2_shift;
			
 
				-    op_params.output_offset = data->output_offset;
			
 
				-    op_params.output_multiplier = data->output_multiplier;
			
 
				-    op_params.output_shift = data->output_shift;
			
 
				-    SetActivationParams(data->output_activation_min,
			
 
				-                        data->output_activation_max, &op_params);
			
 
				-    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
			
 
				-        tflite::micro::GetTensorShape(input1),
			
 
				-        tflite::micro::GetTensorShape(input2), &op_params);
			
 
				-    if (output->type == kTfLiteInt8) {
			
 
				+  tflite::ArithmeticParams op_params;
			
 
				+  op_params.left_shift = data->left_shift;
			
 
				+  op_params.input1_offset = data->input1_offset;
			
 
				+  op_params.input1_multiplier = data->input1_multiplier;
			
 
				+  op_params.input1_shift = data->input1_shift;
			
 
				+  op_params.input2_offset = data->input2_offset;
			
 
				+  op_params.input2_multiplier = data->input2_multiplier;
			
 
				+  op_params.input2_shift = data->input2_shift;
			
 
				+  op_params.output_offset = data->output_offset;
			
 
				+  op_params.output_multiplier = data->output_multiplier;
			
 
				+  op_params.output_shift = data->output_shift;
			
 
				+  SetActivationParams(data->output_activation_min, data->output_activation_max,
			
 
				+                      &op_params);
			
 
				+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
			
 
				+      tflite::micro::GetTensorShape(input1),
			
 
				+      tflite::micro::GetTensorShape(input2), &op_params);
			
 
				+
			
 
				+  switch (output->type) {
			
 
				+    case kTfLiteInt8: {
			
 
				       if (need_broadcast) {
			
 
				         reference_integer_ops::BroadcastAdd4DSlow(
			
 
				             op_params, tflite::micro::GetTensorShape(input1),
			
@@ -168,24 +169,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
 
				             tflite::micro::GetTensorShape(output),
			
 
				             tflite::micro::GetTensorData<int8_t>(output));
			
 
				       }
			
 
				-    } else {
			
 
				+      break;
			
 
				+    }
			
 
				+    case kTfLiteInt16: {
			
 
				       if (need_broadcast) {
			
 
				         reference_ops::BroadcastAdd4DSlow(
			
 
				             op_params, tflite::micro::GetTensorShape(input1),
			
 
				-            tflite::micro::GetTensorData<uint8_t>(input1),
			
 
				+            tflite::micro::GetTensorData<int16_t>(input1),
			
 
				             tflite::micro::GetTensorShape(input2),
			
 
				-            tflite::micro::GetTensorData<uint8_t>(input2),
			
 
				+            tflite::micro::GetTensorData<int16_t>(input2),
			
 
				             tflite::micro::GetTensorShape(output),
			
 
				-            tflite::micro::GetTensorData<uint8_t>(output));
			
 
				+            tflite::micro::GetTensorData<int16_t>(output));
			
 
				       } else {
			
 
				         reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
			
 
				-                           tflite::micro::GetTensorData<uint8_t>(input1),
			
 
				+                           tflite::micro::GetTensorData<int16_t>(input1),
			
 
				                            tflite::micro::GetTensorShape(input2),
			
 
				-                           tflite::micro::GetTensorData<uint8_t>(input2),
			
 
				+                           tflite::micro::GetTensorData<int16_t>(input2),
			
 
				                            tflite::micro::GetTensorShape(output),
			
 
				-                           tflite::micro::GetTensorData<uint8_t>(output));
			
 
				+                           tflite::micro::GetTensorData<int16_t>(output),
			
 
				+                           false);
			
 
				       }
			
 
				+      break;
			
 
				     }
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
			
 
				+                         TfLiteTypeGetName(output->type), output->type);
			
 
				+      return kTfLiteError;
			
 
				   }
			
 
				 
			
 
				   return kTfLiteOk;
			
@@ -231,7 +240,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
				 
			
 
				   if (output->type == kTfLiteFloat32) {
			
 
				     EvalAdd(context, node, params, data, input1, input2, output);
			
 
				-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
			
 
				+  } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
			
 
				     TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
			
 
				                                                 input1, input2, output));
			
 
				   } else {
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/add_n.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/add_n.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
				 #include <cstdint>
			
 
				 
			
 
				 #include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
@@ -28,6 +29,22 @@ namespace {
 
				 constexpr int kInputTensor0 = 0;
			
 
				 constexpr int kOutputTensor = 0;
			
 
				 
			
 
				+constexpr int kAddNIntegerShift = 20;
			
 
				+
			
 
				+// only used with INT8 tensors
			
 
				+struct OpData {
			
 
				+  int32_t output_activation_min;
			
 
				+  int32_t output_activation_max;
			
 
				+  int32_t input_offset;
			
 
				+  int32_t output_offset;
			
 
				+  int32_t input_multiplier;
			
 
				+  int32_t output_multiplier;
			
 
				+  int input_shift;
			
 
				+  int output_shift;
			
 
				+  int left_shift;
			
 
				+  int scratch_index;
			
 
				+};
			
 
				+
			
 
				 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
			
 
				   int num_inputs = NumInputs(node);
			
 
				   TF_LITE_ENSURE(context, num_inputs >= 2);
			
@@ -47,19 +64,61 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
 
				     TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
			
 
				     TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
			
 
				     TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
			
 
				+
			
 
				+    // Check that all INT8 input tensors have the same zero-point and scale.
			
 
				+    if (input_tensor_first->type == kTfLiteInt8) {
			
 
				+      TF_LITE_ENSURE(context, input_tensor_first->params.zero_point ==
			
 
				+                                  input->params.zero_point);
			
 
				+      TF_LITE_ENSURE(context,
			
 
				+                     input_tensor_first->params.scale == input->params.scale);
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				-  // Allocate scratch buffer space for pointer to each tensor's data
			
 
				-  // and store the scratch buffer index in the node's user_data
			
 
				   if (output->type == kTfLiteFloat32) {
			
 
				+    // Allocate scratch buffer space for pointer to each tensor's data
			
 
				+    // and store the scratch buffer index in the node's user_data
			
 
				     int scratch_index;
			
 
				     size_t scratch_size = sizeof(float*) * num_inputs;
			
 
				     TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
			
 
				                                    context, scratch_size, &scratch_index));
			
 
				     node->user_data =
			
 
				         reinterpret_cast<decltype(node->user_data)>(scratch_index);
			
 
				+  } else if (output->type == kTfLiteInt8) {
			
 
				+    node->user_data =
			
 
				+        context->AllocatePersistentBuffer(context, sizeof(OpData));
			
 
				+    OpData* data = static_cast<OpData*>(node->user_data);
			
 
				+
			
 
				+    // Allocate scratch buffer space for pointer to each tensor's data
			
 
				+    // and store the scratch buffer index in OpData
			
 
				+    size_t scratch_size = sizeof(int8_t*) * num_inputs;
			
 
				+    TF_LITE_ENSURE_OK(
			
 
				+        context, context->RequestScratchBufferInArena(context, scratch_size,
			
 
				+                                                      &data->scratch_index));
			
 
				+
			
 
				+    // 8bit -> 8bit general quantized path, with general rescalings
			
 
				+    data->input_offset = -input_tensor_first->params.zero_point;
			
 
				+    data->output_offset = output->params.zero_point;
			
 
				+    data->left_shift = kAddNIntegerShift;
			
 
				+    const double twice_max_input_scale =
			
 
				+        2 * static_cast<double>(input_tensor_first->params.scale);
			
 
				+    const double real_input_multiplier =
			
 
				+        static_cast<double>(input_tensor_first->params.scale) /
			
 
				+        twice_max_input_scale;
			
 
				+    const double real_output_multiplier =
			
 
				+        twice_max_input_scale /
			
 
				+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
			
 
				+
			
 
				+    QuantizeMultiplierSmallerThanOneExp(
			
 
				+        real_input_multiplier, &data->input_multiplier, &data->input_shift);
			
 
				+
			
 
				+    QuantizeMultiplierSmallerThanOneExp(
			
 
				+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
			
 
				+
			
 
				+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
			
 
				+        context, kTfLiteActNone, output, &data->output_activation_min,
			
 
				+        &data->output_activation_max));
			
 
				   } else {
			
 
				-    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
			
 
				+    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
			
 
				                        TfLiteTypeGetName(output->type));
			
 
				     return kTfLiteError;
			
 
				   }
			
@@ -72,12 +131,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
				 }
			
 
				 
			
 
				 template <typename T>
			
 
				-void EvalAddN(TfLiteContext* context, TfLiteNode* node,
			
 
				-              TfLiteEvalTensor* output) {
			
 
				+inline const T** CopyInputsToScratchBuffer(TfLiteContext* context,
			
 
				+                                           TfLiteNode* node,
			
 
				+                                           const int scratch_index) {
			
 
				   int num_inputs = NumInputs(node);
			
 
				-
			
 
				-  int scratch_index =
			
 
				-      static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
			
 
				   void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
			
 
				   const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
			
 
				   for (int i = 0; i < num_inputs; i++) {
			
@@ -86,17 +143,56 @@ void EvalAddN(TfLiteContext* context, TfLiteNode* node,
 
				     all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
			
 
				   }
			
 
				 
			
 
				+  return all_inputs;
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+void EvalAddN(TfLiteContext* context, TfLiteNode* node,
			
 
				+              TfLiteEvalTensor* output) {
			
 
				+  int num_inputs = NumInputs(node);
			
 
				+
			
 
				+  int scratch_index =
			
 
				+      static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
			
 
				+  const T** all_inputs =
			
 
				+      CopyInputsToScratchBuffer<T>(context, node, scratch_index);
			
 
				+
			
 
				   reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
			
 
				                          all_inputs, tflite::micro::GetTensorData<T>(output));
			
 
				 }
			
 
				 
			
 
				+template <typename T>
			
 
				+void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node,
			
 
				+                       TfLiteEvalTensor* output) {
			
 
				+  int num_inputs = NumInputs(node);
			
 
				+
			
 
				+  OpData* data = static_cast<OpData*>(node->user_data);
			
 
				+  const T** all_inputs =
			
 
				+      CopyInputsToScratchBuffer<T>(context, node, data->scratch_index);
			
 
				+
			
 
				+  ArithmeticParams params;
			
 
				+  params.left_shift = data->left_shift;
			
 
				+  params.input1_offset = data->input_offset;
			
 
				+  params.input1_multiplier = data->input_multiplier;
			
 
				+  params.input1_shift = data->input_shift;
			
 
				+  params.output_offset = data->output_offset;
			
 
				+  params.output_multiplier = data->output_multiplier;
			
 
				+  params.output_shift = data->output_shift;
			
 
				+  SetActivationParams(data->output_activation_min, data->output_activation_max,
			
 
				+                      &params);
			
 
				+
			
 
				+  reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs,
			
 
				+                      all_inputs, tflite::micro::GetTensorData<T>(output));
			
 
				+}
			
 
				+
			
 
				 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   TfLiteEvalTensor* output =
			
 
				       tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				   if (output->type == kTfLiteFloat32) {
			
 
				     EvalAddN<float>(context, node, output);
			
 
				+  } else if (output->type == kTfLiteInt8) {
			
 
				+    EvalAddNQuantized<int8_t>(context, node, output);
			
 
				   } else {
			
 
				-    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
			
 
				+    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
			
 
				                        TfLiteTypeGetName(output->type));
			
 
				     return kTfLiteError;
			
 
				   }
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/circular_buffer.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/circular_buffer.cc
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 
				 limitations under the License.
			
 
				 ==============================================================================*/
			
 
				 
			
 
				-#define FLATBUFFERS_LOCALE_INDEPENDENT 0
			
 
				-#include "flatbuffers/flexbuffers.h"
			
 
				 #include "tensorflow/lite/c/builtin_op_data.h"
			
 
				 #include "tensorflow/lite/c/common.h"
			
 
				 #include "tensorflow/lite/kernels/internal/compatibility.h"
			
@@ -22,6 +20,7 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
 
				 #include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/flatbuffer_utils.h"
			
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				 
			
 
				 /*
			
@@ -56,6 +55,11 @@ namespace {
 
				 constexpr int kInputTensor = 0;
			
 
				 constexpr int kOutputTensor = 0;
			
 
				 
			
 
				+// Indices into the init flexbuffer's vector.
			
 
				+// The parameter's name is in the comment that follows.
			
 
				+// Elements in the vectors are ordered alphabetically by parameter name.
			
 
				+constexpr int kCyclesMaxIndex = 0;  // 'cycles_max'
			
 
				+
			
 
				 // TODO(b/149795762): Add this to TfLiteStatus enum.
			
 
				 constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
			
 
				 
			
@@ -76,8 +80,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 
				 
			
 
				   if (buffer != nullptr && length > 0) {
			
 
				     const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
			
 
				-    const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
			
 
				-    op_data->cycles_max = m["cycles_max"].AsInt32();
			
 
				+    tflite::FlexbufferWrapper wrapper(buffer_t, length);
			
 
				+    op_data->cycles_max = wrapper.ElementAsInt32(kCyclesMaxIndex);
			
 
				   } else {
			
 
				     op_data->cycles_max = 0;
			
 
				   }
			
@@ -118,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
				     // https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
			
 
				     // https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
			
 
				     if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
			
 
				+        output->dims->data[1] == 25 ||
			
 
				         (cb_prepare_count == 5 && output->dims->data[2] == 2 &&
			
 
				          output->dims->data[3] == 96)) {
			
 
				       op_data->cycles_max = 1;
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/concatenation.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/concatenation.cc
@@ -147,8 +147,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
				   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
			
 
				   TF_LITE_ENSURE(context,
			
 
				                  input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
			
 
				-                     input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
			
 
				-                     input_type == kTfLiteInt64);
			
 
				+                     input_type == kTfLiteInt8 || input_type == kTfLiteInt16 ||
			
 
				+                     input_type == kTfLiteInt32 || input_type == kTfLiteInt64);
			
 
				 
			
 
				   // Output type must match input type
			
 
				   TF_LITE_ENSURE_EQ(context, output_type, input_type);
			
@@ -182,6 +182,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
				 
			
 
				   switch (output_type) {  // Already know in/outtypes are same.
			
 
				     case kTfLiteFloat32:
			
 
				+    case kTfLiteInt16:
			
 
				     case kTfLiteInt32:
			
 
				     case kTfLiteInt64: {
			
 
				       data->params.axis = CalculatePositiveAxis(params->axis, output);
			
@@ -247,6 +248,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
				     case kTfLiteInt64:
			
 
				       EvalUnquantized<int64_t>(context, node);
			
 
				       break;
			
 
				+    case kTfLiteInt16:
			
 
				+      EvalUnquantized<int16_t>(context, node);
			
 
				+      break;
			
 
				 
			
 
				     default:
			
 
				       TF_LITE_KERNEL_LOG(
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.cc
@@ -53,8 +53,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
				   const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
			
 
				 
			
 
				   TF_LITE_ENSURE_EQ(context, input->type, output->type);
			
 
				-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
			
 
				-                     "Hybrid models are not supported on TFLite Micro.");
			
 
				+  TF_LITE_ENSURE_MSG(
			
 
				+      context,
			
 
				+      input->type == filter->type ||
			
 
				+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
			
 
				+      "Hybrid models are not supported on TFLite Micro.");
			
 
				 
			
 
				   switch (input->type) {  // Already know in/out types are same.
			
 
				     case kTfLiteFloat32: {
			
@@ -70,6 +73,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
				           tflite::micro::GetTensorShape(nullptr), nullptr);
			
 
				       break;
			
 
				     }
			
 
				+    case kTfLiteInt16: {
			
 
				+      reference_integer_ops::ConvPerChannel(
			
 
				+          ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
			
 
				+          data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
			
 
				+          tflite::micro::GetTensorData<int16_t>(input),
			
 
				+          tflite::micro::GetTensorShape(filter),
			
 
				+          tflite::micro::GetTensorData<int8_t>(filter),
			
 
				+          tflite::micro::GetTensorShape(bias),
			
 
				+          tflite::micro::GetTensorData<std::int64_t>(bias),
			
 
				+          tflite::micro::GetTensorShape(output),
			
 
				+          tflite::micro::GetTensorData<int16_t>(output));
			
 
				+      break;
			
 
				+    }
			
 
				     case kTfLiteInt8: {
			
 
				       reference_integer_ops::ConvPerChannel(
			
 
				           ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv.h
@@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
 
				 
			
 
				 TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
			
 
				 
			
 
				+// This is the most generic TfLiteRegistration. The actual supported types may
			
 
				+// still be target dependent. The only requirement is that every implementation
			
 
				+// (reference or optimized) must define this function.
			
 
				+TfLiteRegistration Register_CONV_2D();
			
 
				+
			
 
				+#if defined(XTENSA)
			
 
				+// Returns a TfLiteRegistration struct for kernel variant that only supports
			
 
				+// int8 inputs and outputs.
			
 
				+TfLiteRegistration Register_CONV_2D_INT8REF();
			
 
				+#else
			
 
				+inline TfLiteRegistration Register_CONV_2D_INT8REF() {
			
 
				+  return Register_CONV_2D();
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 }  // namespace tflite
			
 
				 
			
 
				 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_common.cc
@@ -111,8 +111,7 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
 
				         context, input, filter, bias, output, params.activation,
			
 
				         &data->output_multiplier, &data->output_shift,
			
 
				         &data->output_activation_min, &data->output_activation_max,
			
 
				-        data->per_channel_output_multiplier,
			
 
				-        reinterpret_cast<int*>(data->per_channel_output_shift),
			
 
				+        data->per_channel_output_multiplier, data->per_channel_output_shift,
			
 
				         output_channels));
			
 
				   }
			
 
				 
			
@@ -155,7 +154,7 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
 
				           context, num_channels * sizeof(int32_t)));
			
 
				 
			
 
				   // All per-channel quantized tensors need valid zero point and scale arrays.
			
 
				-  if (input->type == kTfLiteInt8) {
			
 
				+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
			
 
				     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
			
 
				                       kTfLiteAffineQuantization);
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_test.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/conv_test.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 
				 limitations under the License.
			
 
				 ==============================================================================*/
			
 
				 
			
 
				-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
			
 
				-#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
			
 
				+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
			
 
				+#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
			
 
				 
			
 
				 #include "tensorflow/lite/c/builtin_op_data.h"
			
 
				 #include "tensorflow/lite/c/common.h"
			
@@ -59,36 +59,45 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
 
				                                  TfLiteRegistration registration,
			
 
				                                  uint8_t* output_data, float tolerance = 1e-5);
			
 
				 
			
 
				-TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
			
 
				-                           const int* filter_dims_data,
			
 
				-                           const float* filter_data, const int* bias_dims_data,
			
 
				-                           const float* bias_data, const int* output_dims_data,
			
 
				+TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
			
 
				+                           int* filter_dims_data, const float* filter_data,
			
 
				+                           int* bias_dims_data, const float* bias_data,
			
 
				+                           int* output_dims_data,
			
 
				                            const float* expected_output_data,
			
 
				                            TfLiteConvParams* conv_params,
			
 
				                            TfLiteRegistration registration, float* output_data);
			
 
				 
			
 
				 TfLiteStatus TestConvQuantizedPerLayer(
			
 
				-    const int* input_dims_data, const float* input_data,
			
 
				-    uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
			
 
				-    const float* filter_data, uint8_t* filter_quantized, float filter_scale,
			
 
				-    const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
			
 
				-    const int* output_dims_data, const float* expected_output_data,
			
 
				-    uint8_t* expected_output_quantized, float output_scale,
			
 
				-    TfLiteConvParams* conv_params, TfLiteRegistration registration,
			
 
				-    uint8_t* output_data);
			
 
				+    int* input_dims_data, const float* input_data, uint8_t* input_quantized,
			
 
				+    float input_scale, int* filter_dims_data, const float* filter_data,
			
 
				+    uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
			
 
				+    const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
			
 
				+    const float* expected_output_data, uint8_t* expected_output_quantized,
			
 
				+    float output_scale, TfLiteConvParams* conv_params,
			
 
				+    TfLiteRegistration registration, uint8_t* output_data);
			
 
				 
			
 
				 TfLiteStatus TestConvQuantizedPerChannel(
			
 
				-    const int* input_dims_data, const float* input_data,
			
 
				-    int8_t* input_quantized, float input_scale, int input_zero_point,
			
 
				-    const int* filter_dims_data, const float* filter_data,
			
 
				-    int8_t* filter_data_quantized, const int* bias_dims_data,
			
 
				-    const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
			
 
				-    int* bias_zero_points, const int* output_dims_data,
			
 
				+    int* input_dims_data, const float* input_data, int8_t* input_quantized,
			
 
				+    float input_scale, int input_zero_point, int* filter_dims_data,
			
 
				+    const float* filter_data, int8_t* filter_data_quantized,
			
 
				+    int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
			
 
				+    float* bias_scales, int* bias_zero_points, int* output_dims_data,
			
 
				     const float* expected_output_data, int8_t* expected_output_data_quantized,
			
 
				     float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
			
 
				     TfLiteRegistration registration, int8_t* output_data);
			
 
				 
			
 
				+TfLiteStatus TestConvQuantizedPerChannel(
			
 
				+    int* input_dims_data, const float* input_data, int16_t* input_quantized,
			
 
				+    float input_scale, int input_zero_point, int* filter_dims_data,
			
 
				+    const float* filter_data, int8_t* filter_data_quantized,
			
 
				+    int* bias_dims_data, const float* bias_data,
			
 
				+    std::int64_t* bias_data_quantized, float* bias_scales,
			
 
				+    int* bias_zero_points, int* output_dims_data,
			
 
				+    const float* expected_output_data, int16_t* expected_output_data_quantized,
			
 
				+    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
			
 
				+    TfLiteRegistration registration, int16_t* output_data);
			
 
				+
			
 
				 }  // namespace testing
			
 
				 }  // namespace tflite
			
 
				 
			
 
				-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
			
 
				+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/cumsum.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/cumsum.cc
@@ -0,0 +1,173 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/reference/cumsum.h"
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+constexpr int kInputTensor = 0;
			
 
				+constexpr int kAxisTensor = 1;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+constexpr int kCumSumIntegerShift = 20;
			
 
				+
			
 
				+// only used with INT8 tensors
			
 
				+struct OpData {
			
 
				+  int32_t output_activation_min;
			
 
				+  int32_t output_activation_max;
			
 
				+  int32_t input_offset;
			
 
				+  int32_t output_offset;
			
 
				+  int32_t input_multiplier;
			
 
				+  int32_t output_multiplier;
			
 
				+  int input_shift;
			
 
				+  int output_shift;
			
 
				+  int left_shift;
			
 
				+};
			
 
				+
			
 
				+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				+  const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
			
 
				+
			
 
				+  TF_LITE_ENSURE(context,
			
 
				+                 input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
			
 
				+  TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
			
 
				+
			
 
				+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
			
 
				+
			
 
				+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
			
 
				+  TF_LITE_ENSURE(context, HaveSameShapes(input, output));
			
 
				+
			
 
				+  if (output->type == kTfLiteInt8) {
			
 
				+    node->user_data =
			
 
				+        context->AllocatePersistentBuffer(context, sizeof(OpData));
			
 
				+    OpData* data = static_cast<OpData*>(node->user_data);
			
 
				+
			
 
				+    // 8bit -> 8bit general quantized path, with general rescalings
			
 
				+    data->input_offset = -input->params.zero_point;
			
 
				+    data->output_offset = output->params.zero_point;
			
 
				+    data->left_shift = kCumSumIntegerShift;
			
 
				+    const double twice_max_input_scale =
			
 
				+        2 * static_cast<double>(input->params.scale);
			
 
				+    const double real_input_multiplier =
			
 
				+        static_cast<double>(input->params.scale) / twice_max_input_scale;
			
 
				+    const double real_output_multiplier =
			
 
				+        twice_max_input_scale /
			
 
				+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
			
 
				+
			
 
				+    QuantizeMultiplierSmallerThanOneExp(
			
 
				+        real_input_multiplier, &data->input_multiplier, &data->input_shift);
			
 
				+
			
 
				+    QuantizeMultiplierSmallerThanOneExp(
			
 
				+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
			
 
				+
			
 
				+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
			
 
				+        context, kTfLiteActNone, output, &data->output_activation_min,
			
 
				+        &data->output_activation_max));
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  return CalculateOpData(context, node);
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const TfLiteEvalTensor* input =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+  const TfLiteEvalTensor* axis_tensor =
			
 
				+      tflite::micro::GetEvalInput(context, node, kAxisTensor);
			
 
				+
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  auto* cs_params = static_cast<TfLiteCumsumParams*>(node->builtin_data);
			
 
				+  auto input_shape = tflite::micro::GetTensorShape(input);
			
 
				+
			
 
				+  int32_t axis = *tflite::micro::GetTensorData<int32_t>(axis_tensor);
			
 
				+  if (axis < 0) axis += input_shape.DimensionsCount();
			
 
				+
			
 
				+  if (axis < 0 || axis >= input_shape.DimensionsCount()) {
			
 
				+    TF_LITE_KERNEL_LOG(context, "CUMSUM Invalid axis: %d", axis);
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+
			
 
				+  switch (input->type) {
			
 
				+    case kTfLiteFloat32: {
			
 
				+      reference_ops::CumSum(tflite::micro::GetTensorData<float>(input),
			
 
				+                            input_shape, axis, cs_params->exclusive,
			
 
				+                            cs_params->reverse,
			
 
				+                            tflite::micro::GetTensorData<float>(output));
			
 
				+      return kTfLiteOk;
			
 
				+    } break;
			
 
				+
			
 
				+    case kTfLiteInt8: {
			
 
				+      auto* data = static_cast<OpData*>(node->user_data);
			
 
				+      ArithmeticParams params;
			
 
				+      params.left_shift = data->left_shift;
			
 
				+      params.input1_offset = data->input_offset;
			
 
				+      params.input1_multiplier = data->input_multiplier;
			
 
				+      params.input1_shift = data->input_shift;
			
 
				+      params.output_offset = data->output_offset;
			
 
				+      params.output_multiplier = data->output_multiplier;
			
 
				+      params.output_shift = data->output_shift;
			
 
				+      SetActivationParams(data->output_activation_min,
			
 
				+                          data->output_activation_max, &params);
			
 
				+      reference_ops::CumSum(params, tflite::micro::GetTensorData<int8_t>(input),
			
 
				+                            input_shape, axis, cs_params->exclusive,
			
 
				+                            cs_params->reverse,
			
 
				+                            tflite::micro::GetTensorData<int8_t>(output));
			
 
				+      return kTfLiteOk;
			
 
				+    } break;
			
 
				+
			
 
				+    default: {
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "CUMSUM only supports FLOAT32 and INT8, got %s.",
			
 
				+                         TfLiteTypeGetName(output->type));
			
 
				+      return kTfLiteError;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteError;
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_CUMSUM() {
			
 
				+  return {/*init=*/nullptr,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/depth_to_space.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/depth_to_space.cc
@@ -0,0 +1,143 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+constexpr int kInputTensor = 0;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+// input/output tensor shape rank associations
			
 
				+constexpr int kBatchRank = 0;
			
 
				+constexpr int kHeightRank = 1;
			
 
				+constexpr int kWidthRank = 2;
			
 
				+constexpr int kDepthRank = 3;
			
 
				+
			
 
				+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  auto* params =
			
 
				+      reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* input;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
			
 
				+  TfLiteTensor* output;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
			
 
				+
			
 
				+  auto data_type = output->type;
			
 
				+  TF_LITE_ENSURE(context,
			
 
				+                 data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
			
 
				+
			
 
				+  const int block_size = params->block_size;
			
 
				+  TF_LITE_ENSURE(context, block_size > 0);
			
 
				+  const int input_height = input->dims->data[kHeightRank];
			
 
				+  const int input_width = input->dims->data[kWidthRank];
			
 
				+  const int input_channels = input->dims->data[kDepthRank];
			
 
				+  int output_height = input_height * block_size;
			
 
				+  int output_width = input_width * block_size;
			
 
				+  int output_channels = input_channels / block_size / block_size;
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size);
			
 
				+  TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size);
			
 
				+  TF_LITE_ENSURE_EQ(context, input_channels,
			
 
				+                    output_channels * block_size * block_size);
			
 
				+
			
 
				+  // We must update the output tensor dimensions.
			
 
				+  // The dims storage is expected to be the same area in memory
			
 
				+  // for both TfLiteTensor and TfLiteEvalTensor.  This is important
			
 
				+  // because TfLiteTensor in the MicroInterpreter is a temporary
			
 
				+  // allocation.  For the KernelRunner interpreter, TfLiteEvalTensor
			
 
				+  // is a temporary allocation.  We must therefore relocate the dims
			
 
				+  // from the FlatBuffer to the persistant storage arena.
			
 
				+  TfLiteEvalTensor* output_eval =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
			
 
				+                                 context, output, output_eval));
			
 
				+  output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
			
 
				+  output->dims->data[kHeightRank] = output_height;
			
 
				+  output->dims->data[kWidthRank] = output_width;
			
 
				+  output->dims->data[kDepthRank] = output_channels;
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  return CalculateOpData(context, node);
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  auto* params =
			
 
				+      reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
			
 
				+
			
 
				+  const TfLiteEvalTensor* input =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  tflite::DepthToSpaceParams op_params;
			
 
				+  op_params.block_size = static_cast<int32_t>(params->block_size);
			
 
				+
			
 
				+  switch (input->type) {  // Already know in/out types are same.
			
 
				+    case kTfLiteFloat32:
			
 
				+      reference_ops::DepthToSpace(op_params,
			
 
				+                                  tflite::micro::GetTensorShape(input),
			
 
				+                                  tflite::micro::GetTensorData<float>(input),
			
 
				+                                  tflite::micro::GetTensorShape(output),
			
 
				+                                  tflite::micro::GetTensorData<float>(output));
			
 
				+      break;
			
 
				+    case kTfLiteInt8:
			
 
				+      reference_ops::DepthToSpace(op_params,
			
 
				+                                  tflite::micro::GetTensorShape(input),
			
 
				+                                  tflite::micro::GetTensorData<int8_t>(input),
			
 
				+                                  tflite::micro::GetTensorShape(output),
			
 
				+                                  tflite::micro::GetTensorData<int8_t>(output));
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(
			
 
				+          context, "DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.",
			
 
				+          TfLiteTypeGetName(output->type));
			
 
				+      return kTfLiteError;
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_DEPTH_TO_SPACE() {
			
 
				+  return {/*init=*/nullptr,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -20,7 +20,6 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/common.h"
			
 
				 #include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
			
 
				 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
			
 
				 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
@@ -18,7 +18,6 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/common.h"
			
 
				 #include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
			
 
				 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
			
 
				 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
@@ -113,8 +112,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
 
				         context, input, filter, bias, output, params.activation,
			
 
				         &data->output_multiplier, &data->output_shift,
			
 
				         &data->output_activation_min, &data->output_activation_max,
			
 
				-        data->per_channel_output_multiplier,
			
 
				-        reinterpret_cast<int*>(data->per_channel_output_shift),
			
 
				+        data->per_channel_output_multiplier, data->per_channel_output_shift,
			
 
				         output_channels));
			
 
				   }
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/detection_postprocess.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/detection_postprocess.cc
@@ -15,7 +15,6 @@ limitations under the License.
 
				 
			
 
				 #include <numeric>
			
 
				 
			
 
				-#define FLATBUFFERS_LOCALE_INDEPENDENT 0
			
 
				 #include "flatbuffers/flexbuffers.h"
			
 
				 #include "tensorflow/lite/c/builtin_op_data.h"
			
 
				 #include "tensorflow/lite/c/common.h"
			
@@ -117,12 +116,11 @@ struct OpData {
 
				 };
			
 
				 
			
 
				 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				   OpData* op_data = nullptr;
			
 
				 
			
 
				   const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
			
 
				   const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
			
 
				-
			
 
				-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				   op_data = reinterpret_cast<OpData*>(
			
 
				       context->AllocatePersistentBuffer(context, sizeof(OpData)));
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/div.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/div.cc
@@ -1,206 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/reference/div.h"
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				-#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace {
			
 
				-
			
 
				-constexpr int kInputTensor1 = 0;
			
 
				-constexpr int kInputTensor2 = 1;
			
 
				-constexpr int kOutputTensor = 0;
			
 
				-
			
 
				-struct OpData {
			
 
				-  // Parameters used in the quantized paths where the output is 8bit
			
 
				-  int32_t input1_zero_point;
			
 
				-  int32_t input2_zero_point;
			
 
				-  int32_t output_zero_point;
			
 
				-  int32_t output_activation_min;
			
 
				-  int32_t output_activation_max;
			
 
				-
			
 
				-  // Parameters used in all quantized paths
			
 
				-  int32_t output_multiplier;
			
 
				-  int output_shift;
			
 
				-};
			
 
				-
			
 
				-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
			
 
				-                             TfLiteDivParams* params, OpData* data) {
			
 
				-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				-
			
 
				-  const TfLiteTensor* input1;
			
 
				-  TF_LITE_ENSURE_OK(context,
			
 
				-                    GetInputSafe(context, node, kInputTensor1, &input1));
			
 
				-  const TfLiteTensor* input2;
			
 
				-  TF_LITE_ENSURE_OK(context,
			
 
				-                    GetInputSafe(context, node, kInputTensor2, &input2));
			
 
				-  TfLiteTensor* output;
			
 
				-  TF_LITE_ENSURE_OK(context,
			
 
				-                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				-
			
 
				-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
			
 
				-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
			
 
				-
			
 
				-  if (output->type == kTfLiteInt8) {
			
 
				-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
			
 
				-        context, params->activation, output, &data->output_activation_min,
			
 
				-        &data->output_activation_max));
			
 
				-    const double real_multiplier = static_cast<double>(
			
 
				-        input1->params.scale / (input2->params.scale * output->params.scale));
			
 
				-    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
			
 
				-                       &data->output_shift);
			
 
				-    data->input1_zero_point = input1->params.zero_point;
			
 
				-    data->input2_zero_point = input2->params.zero_point;
			
 
				-    data->output_zero_point = output->params.zero_point;
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
			
 
				-}
			
 
				-
			
 
				-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
			
 
				-  auto* data = static_cast<OpData*>(node->user_data);
			
 
				-  return CalculateOpData(context, node, params, data);
			
 
				-}
			
 
				-
			
 
				-void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
			
 
				-             const OpData* data, const TfLiteEvalTensor* input1,
			
 
				-             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
			
 
				-  tflite::ArithmeticParams op_params = {};
			
 
				-
			
 
				-#define TF_LITE_DIV(type, opname, data_type)                           \
			
 
				-  data_type output_activation_min, output_activation_max;              \
			
 
				-  CalculateActivationRange(params->activation, &output_activation_min, \
			
 
				-                           &output_activation_max);                    \
			
 
				-  SetActivationParams(output_activation_min, output_activation_max,    \
			
 
				-                      &op_params);                                     \
			
 
				-  type::opname(op_params, tflite::micro::GetTensorShape(input1),       \
			
 
				-               tflite::micro::GetTensorData<data_type>(input1),        \
			
 
				-               tflite::micro::GetTensorShape(input2),                  \
			
 
				-               tflite::micro::GetTensorData<data_type>(input2),        \
			
 
				-               tflite::micro::GetTensorShape(output),                  \
			
 
				-               tflite::micro::GetTensorData<data_type>(output))
			
 
				-
			
 
				-  bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
			
 
				-      tflite::micro::GetTensorShape(input1),
			
 
				-      tflite::micro::GetTensorShape(input2), &op_params);
			
 
				-
			
 
				-  if (requires_broadcast) {
			
 
				-    TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
			
 
				-  } else {
			
 
				-    TF_LITE_DIV(reference_ops, Div, float);
			
 
				-  }
			
 
				-#undef TF_LITE_DIV
			
 
				-}
			
 
				-
			
 
				-TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
			
 
				-                           TfLiteDivParams* params, const OpData* data,
			
 
				-                           const TfLiteEvalTensor* input1,
			
 
				-                           const TfLiteEvalTensor* input2,
			
 
				-                           TfLiteEvalTensor* output) {
			
 
				-  tflite::ArithmeticParams op_params = {};
			
 
				-
			
 
				-#define TF_LITE_DIV(type, opname, dtype)                         \
			
 
				-  type::opname(op_params, tflite::micro::GetTensorShape(input1), \
			
 
				-               tflite::micro::GetTensorData<dtype>(input1),      \
			
 
				-               tflite::micro::GetTensorShape(input2),            \
			
 
				-               tflite::micro::GetTensorData<dtype>(input2),      \
			
 
				-               tflite::micro::GetTensorShape(output),            \
			
 
				-               tflite::micro::GetTensorData<dtype>(output))
			
 
				-
			
 
				-  if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 &&
			
 
				-      output->type == kTfLiteInt8) {
			
 
				-    SetActivationParams(data->output_activation_min,
			
 
				-                        data->output_activation_max, &op_params);
			
 
				-    op_params.input1_offset = -data->input1_zero_point;
			
 
				-    op_params.input2_offset = -data->input2_zero_point;
			
 
				-    op_params.output_offset = data->output_zero_point;
			
 
				-    op_params.output_multiplier = data->output_multiplier;
			
 
				-    op_params.output_shift = data->output_shift;
			
 
				-
			
 
				-    bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
			
 
				-        tflite::micro::GetTensorShape(input1),
			
 
				-        tflite::micro::GetTensorShape(input2), &op_params);
			
 
				-
			
 
				-    if (requires_broadcast) {
			
 
				-      TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t);
			
 
				-    } else {
			
 
				-      TF_LITE_DIV(reference_ops, Div, int8_t);
			
 
				-    }
			
 
				-#undef TF_LITE_DIV
			
 
				-  } else {
			
 
				-    TF_LITE_KERNEL_LOG(
			
 
				-        context, "Unsupported combination of input and output types in DIV.");
			
 
				-    return kTfLiteError;
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  TFLITE_DCHECK(node->builtin_data != nullptr);
			
 
				-  auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
			
 
				-  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  auto* data = static_cast<OpData*>(node->user_data);
			
 
				-
			
 
				-  const TfLiteEvalTensor* input1 =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
			
 
				-  const TfLiteEvalTensor* input2 =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
			
 
				-  TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				-
			
 
				-  if (output->type == kTfLiteFloat32) {
			
 
				-    EvalDiv(context, node, params, data, input1, input2, output);
			
 
				-  } else if (output->type == kTfLiteInt8) {
			
 
				-    TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
			
 
				-                                             input1, input2, output));
			
 
				-  } else {
			
 
				-    TF_LITE_KERNEL_LOG(context,
			
 
				-                       "DIV only supports FLOAT32, quantized INT8 "
			
 
				-                       "now, got type %s (%d).",
			
 
				-                       TfLiteTypeGetName(output->type), output->type);
			
 
				-    return kTfLiteError;
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-}  // namespace
			
 
				-
			
 
				-TfLiteRegistration Register_DIV() {
			
 
				-  return {/*init=*/Init,
			
 
				-          /*free=*/nullptr,
			
 
				-          /*prepare=*/Prepare,
			
 
				-          /*invoke=*/Eval,
			
 
				-          /*profiling_string=*/nullptr,
			
 
				-          /*builtin_code=*/0,
			
 
				-          /*custom_name=*/nullptr,
			
 
				-          /*version=*/0};
			
 
				-}
			
 
				-
			
 
				-}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/elu.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/elu.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/types.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_error_reporter.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				 namespace {
			
@@ -45,7 +46,10 @@ using TransformFunc = float (*)(float);
 
				 template <typename T>
			
 
				 void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
			
 
				                          const TransformFunc transform, OpData* data) {
			
 
				-  if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
			
 
				+  if (sizeof(T) != 1) {
			
 
				+    MicroPrintf("Lookup table valid only for 8bit");
			
 
				+    TFLITE_ABORT;
			
 
				+  }
			
 
				 
			
 
				   const float inverse_scale = 1 / output->params.scale;
			
 
				   int32_t maxval = std::numeric_limits<T>::max();
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_div.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_div.cc
@@ -0,0 +1,130 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+// Input/output tensor index.
			
 
				+constexpr int kInputTensor1 = 0;
			
 
				+constexpr int kInputTensor2 = 1;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* input1;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetInputSafe(context, node, kInputTensor1, &input1));
			
 
				+  const TfLiteTensor* input2;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetInputSafe(context, node, kInputTensor2, &input2));
			
 
				+  TfLiteTensor* output;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				+
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				+  return nullptr;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  return CalculateOpData(context, node);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+TfLiteStatus EvalFloorDiv(TfLiteContext* context,
			
 
				+                          const TfLiteEvalTensor* input1,
			
 
				+                          const TfLiteEvalTensor* input2,
			
 
				+                          TfLiteEvalTensor* output) {
			
 
				+  const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
			
 
				+
			
 
				+  // Validate the denominator.
			
 
				+  for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) {
			
 
				+    if (std::equal_to<T>()(denominator_data[i], 0)) {
			
 
				+      TF_LITE_KERNEL_LOG(context, "Division by 0");
			
 
				+      return kTfLiteError;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
			
 
				+
			
 
				+  if (requires_broadcast) {
			
 
				+    reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
			
 
				+        tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<T>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2), denominator_data,
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
			
 
				+  } else {
			
 
				+    reference_ops::BinaryFunction<T, T, T>(
			
 
				+        tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<T>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2), denominator_data,
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const TfLiteEvalTensor* input1 =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
			
 
				+  const TfLiteEvalTensor* input2 =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  switch (input1->type) {
			
 
				+    case kTfLiteFloat32: {
			
 
				+      return EvalFloorDiv<float>(context, input1, input2, output);
			
 
				+    }
			
 
				+    default: {
			
 
				+      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_DIV.",
			
 
				+                         TfLiteTypeGetName(input1->type));
			
 
				+      return kTfLiteError;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_FLOOR_DIV() {
			
 
				+  return {/*init=*/Init,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_mod.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/floor_mod.cc
@@ -0,0 +1,128 @@
 
				+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops
			
 
				+// there.
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+// Input/output tensor index.
			
 
				+constexpr int kInputTensor1 = 0;
			
 
				+constexpr int kInputTensor2 = 1;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+// OLD-TODO(b/117912880): Support quantization.
			
 
				+
			
 
				+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* input1;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetInputSafe(context, node, kInputTensor1, &input1));
			
 
				+  const TfLiteTensor* input2;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetInputSafe(context, node, kInputTensor2, &input2));
			
 
				+  TfLiteTensor* output;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				+
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				+  return nullptr;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  return CalculateOpData(context, node);
			
 
				+}
			
 
				+
			
 
				+template <typename T>
			
 
				+TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast,
			
 
				+                          const TfLiteEvalTensor* input1,
			
 
				+                          const TfLiteEvalTensor* input2,
			
 
				+                          TfLiteEvalTensor* output) {
			
 
				+  const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
			
 
				+
			
 
				+  if (requires_broadcast) {
			
 
				+    reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
			
 
				+        tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<T>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2), denominator_data,
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
			
 
				+  } else {
			
 
				+    reference_ops::BinaryFunction<T, T, T>(
			
 
				+        tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<T>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2), denominator_data,
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const TfLiteEvalTensor* input1 =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
			
 
				+  const TfLiteEvalTensor* input2 =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
			
 
				+
			
 
				+  switch (input1->type) {
			
 
				+    case kTfLiteFloat32: {
			
 
				+      return EvalFloorMod<float>(context, requires_broadcast, input1, input2,
			
 
				+                                 output);
			
 
				+    }
			
 
				+    default: {
			
 
				+      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_MOD.",
			
 
				+                         TfLiteTypeGetName(input1->type));
			
 
				+      return kTfLiteError;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_FLOOR_MOD() {
			
 
				+  return {/*init=*/Init,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -109,19 +109,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
				       break;
			
 
				     }
			
 
				 
			
 
				-    case kTfLiteUInt8: {
			
 
				-      tflite::reference_ops::FullyConnected(
			
 
				-          FullyConnectedParamsQuantized(data),
			
 
				-          tflite::micro::GetTensorShape(input),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(input),
			
 
				-          tflite::micro::GetTensorShape(filter),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(filter),
			
 
				-          tflite::micro::GetTensorShape(bias),
			
 
				-          tflite::micro::GetTensorData<int32_t>(bias),
			
 
				-          tflite::micro::GetTensorShape(output),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-      break;
			
 
				-    }
			
 
				     default: {
			
 
				       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
			
 
				                          TfLiteTypeGetName(input->type), input->type);
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected.h
@@ -65,7 +65,7 @@ TfLiteStatus CalculateOpDataFullyConnected(
 
				 // (reference or optimized) must define this function.
			
 
				 TfLiteRegistration Register_FULLY_CONNECTED();
			
 
				 
			
 
				-#if defined(CMSIS_NN) || defined(ARDUINO)
			
 
				+#if defined(CMSIS_NN)
			
 
				 // The Arduino is a special case where we use the CMSIS kernels, but because of
			
 
				 // the current approach to building for Arduino, we do not support -DCMSIS_NN as
			
 
				 // part of the build. As a result, we use defined(ARDUINO) as proxy for the
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/fully_connected_common.cc
@@ -65,6 +65,11 @@ TfLiteStatus CalculateOpDataFullyConnected(
 
				                        &data->output_shift);
			
 
				 
			
 
				     data->input_zero_point = input->params.zero_point;
			
 
				+    // Filter weights will always be symmetric quantized since we only support
			
 
				+    // int8 quantization. See
			
 
				+    // https://github.com/tensorflow/tensorflow/issues/44912 for additional
			
 
				+    // context.
			
 
				+    TFLITE_DCHECK(filter->params.zero_point == 0);
			
 
				     data->filter_zero_point = filter->params.zero_point;
			
 
				     data->output_zero_point = output->params.zero_point;
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/gather.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/gather.cc
@@ -0,0 +1,222 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+constexpr int kInputTensor = 0;
			
 
				+constexpr int kInputPositions = 1;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+template <typename InputT, typename CoordsT = int32_t>
			
 
				+TfLiteStatus Gather(const TfLiteGatherParams* params,
			
 
				+                    const TfLiteEvalTensor* input,
			
 
				+                    const TfLiteEvalTensor* coords, TfLiteEvalTensor* output) {
			
 
				+  const InputT* input_data = tflite::micro::GetTensorData<InputT>(input);
			
 
				+  const CoordsT* coords_data = tflite::micro::GetTensorData<CoordsT>(coords);
			
 
				+  InputT* output_data = tflite::micro::GetTensorData<InputT>(output);
			
 
				+  const TfLiteIntArray* input_dims = input->dims;
			
 
				+  const int input_dims_size = input_dims->size;
			
 
				+  int axis = params->axis;
			
 
				+  if (axis < 0) {
			
 
				+    axis += input_dims_size;
			
 
				+  }
			
 
				+  TFLITE_DCHECK_GE(axis, 0);
			
 
				+  TFLITE_DCHECK_LT(axis, input_dims_size);
			
 
				+
			
 
				+  int batch_dims = params->batch_dims;
			
 
				+  // batch_dims should be in range: [-rank(coords), rank(coords)].
			
 
				+  // Negative batch_dims is added with rank of coords.
			
 
				+  const TfLiteIntArray* coords_dims = coords->dims;
			
 
				+  const int coords_dims_size = coords_dims->size;
			
 
				+  if (batch_dims < 0) {
			
 
				+    batch_dims += coords_dims_size;
			
 
				+  }
			
 
				+  TFLITE_DCHECK_GE(batch_dims, 0);
			
 
				+  TFLITE_DCHECK_LT(batch_dims, input_dims_size);
			
 
				+  TFLITE_DCHECK_LE(batch_dims, coords_dims_size);
			
 
				+  TFLITE_DCHECK_GE(axis, batch_dims);
			
 
				+  for (int i = 0; i < batch_dims; ++i) {
			
 
				+    TFLITE_DCHECK_EQ(input_dims->data[i], coords_dims->data[i]);
			
 
				+  }
			
 
				+
			
 
				+  const int axis_size = input_dims->data[axis];
			
 
				+
			
 
				+  int batch_size = 1;
			
 
				+  for (int i = 0; i < batch_dims; ++i) {
			
 
				+    batch_size *= input_dims->data[i];
			
 
				+  }
			
 
				+  int outer_size = 1;
			
 
				+  for (int i = batch_dims; i < axis; ++i) {
			
 
				+    outer_size *= input_dims->data[i];
			
 
				+  }
			
 
				+  int inner_size = 1;
			
 
				+  for (int i = axis + 1; i < input_dims_size; ++i) {
			
 
				+    inner_size *= input_dims->data[i];
			
 
				+  }
			
 
				+  int coord_size = 1;
			
 
				+  for (int i = batch_dims; i < coords_dims_size; ++i) {
			
 
				+    coord_size *= coords_dims->data[i];
			
 
				+  }
			
 
				+
			
 
				+  for (int batch = 0; batch < batch_size; ++batch) {
			
 
				+    for (int outer = 0; outer < outer_size; ++outer) {
			
 
				+      for (int coord = 0; coord < coord_size; ++coord) {
			
 
				+        TFLITE_DCHECK_GE(coords_data[coord], 0);
			
 
				+        TFLITE_DCHECK_LT(coords_data[coord], axis_size);
			
 
				+        std::memcpy(output_data +
			
 
				+                        (((batch * outer_size) + outer) * coord_size + coord) *
			
 
				+                            inner_size,
			
 
				+                    input_data + (((batch * outer_size) + outer) * axis_size +
			
 
				+                                  coords_data[batch * coord_size + coord]) *
			
 
				+                                     inner_size,
			
 
				+                    sizeof(InputT) * inner_size);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const auto* params =
			
 
				+      reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
			
 
				+  const TfLiteTensor* input;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
			
 
				+  const TfLiteTensor* coords;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetInputSafe(context, node, kInputPositions, &coords));
			
 
				+  TfLiteTensor* output;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				+  switch (coords->type) {
			
 
				+    case kTfLiteInt32:
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "Positions of type '%s' are not supported by gather.",
			
 
				+                         TfLiteTypeGetName(coords->type));
			
 
				+      return kTfLiteError;
			
 
				+      break;
			
 
				+  }
			
 
				+
			
 
				+  // Assign to output the input type.
			
 
				+  output->type = input->type;
			
 
				+
			
 
				+  // Check conditions for different types.
			
 
				+  switch (input->type) {
			
 
				+    case kTfLiteFloat32:
			
 
				+    case kTfLiteInt8:
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
			
 
				+                         TfLiteTypeGetName(input->type));
			
 
				+      return kTfLiteError;
			
 
				+      break;
			
 
				+  }
			
 
				+
			
 
				+  int axis = params->axis;
			
 
				+  if (axis < 0) {
			
 
				+    axis += NumDimensions(input);
			
 
				+  }
			
 
				+  TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
			
 
				+
			
 
				+  int batch_dims = params->batch_dims;
			
 
				+  // batch_dims should be in range: [-rank(coords), rank(coords)].
			
 
				+  // Negative batch_dims is added with rank of coords.
			
 
				+  if (batch_dims < 0) {
			
 
				+    batch_dims += NumDimensions(coords);
			
 
				+  }
			
 
				+  TF_LITE_ENSURE(context, batch_dims <= axis);
			
 
				+  TF_LITE_ENSURE(context, 0 <= batch_dims && batch_dims < NumDimensions(input));
			
 
				+  TF_LITE_ENSURE(context, batch_dims <= NumDimensions(coords));
			
 
				+  for (int i = 0; i < batch_dims; ++i) {
			
 
				+    TF_LITE_ENSURE_EQ(context, input->dims->data[i], coords->dims->data[i]);
			
 
				+  }
			
 
				+
			
 
				+  // GATHER updates the output tensor dimensions, but TfLiteTensor in the
			
 
				+  // MicroInterpreter is a temporary allocation. We must therefore relocate the
			
 
				+  // dims from the FlatBuffer to the persistant storage arena.
			
 
				+  TfLiteEvalTensor* output_eval =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
			
 
				+                                 context, output, output_eval));
			
 
				+
			
 
				+  TfLiteIntArray* output_shape = output->dims;
			
 
				+  output_shape->size =
			
 
				+      NumDimensions(input) + NumDimensions(coords) - 1 - batch_dims;
			
 
				+  int output_index = 0;
			
 
				+  for (int i = 0; i < axis; ++i) {
			
 
				+    output_shape->data[output_index++] = input->dims->data[i];
			
 
				+  }
			
 
				+  for (int i = batch_dims; i < coords->dims->size; ++i) {
			
 
				+    output_shape->data[output_index++] = coords->dims->data[i];
			
 
				+  }
			
 
				+  for (int i = axis + 1; i < input->dims->size; ++i) {
			
 
				+    output_shape->data[output_index++] = input->dims->data[i];
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const auto* params =
			
 
				+      reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
			
 
				+  const TfLiteEvalTensor* input =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+  const TfLiteEvalTensor* coords =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputPositions);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  if (coords->type == kTfLiteInt32) {
			
 
				+    switch (input->type) {
			
 
				+      case kTfLiteFloat32:
			
 
				+        return Gather<float, int32_t>(params, input, coords, output);
			
 
				+        break;
			
 
				+      case kTfLiteInt8:
			
 
				+        return Gather<int8_t, int32_t>(params, input, coords, output);
			
 
				+        break;
			
 
				+      default:
			
 
				+        TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
			
 
				+                           TfLiteTypeGetName(input->type));
			
 
				+        return kTfLiteError;
			
 
				+        break;
			
 
				+    }
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_GATHER() {
			
 
				+  return {/*init=*/nullptr,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/gather_nd.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/gather_nd.cc
@@ -0,0 +1,201 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+constexpr int kParams = 0;
			
 
				+constexpr int kIndices = 1;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+constexpr int MAX_INDICES_ND = 5;
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* params;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kParams, &params));
			
 
				+  const TfLiteTensor* indices;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kIndices, &indices));
			
 
				+  TfLiteTensor* output;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				+
			
 
				+  switch (params->type) {
			
 
				+    case kTfLiteFloat32:
			
 
				+    case kTfLiteInt8:
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "Params of type '%s' are not supported by gather_nd.",
			
 
				+                         TfLiteTypeGetName(params->type));
			
 
				+      return kTfLiteError;
			
 
				+      break;
			
 
				+  }
			
 
				+  switch (indices->type) {
			
 
				+    case kTfLiteInt32:
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "Indices of type '%s' are not supported by gather_nd.",
			
 
				+                         TfLiteTypeGetName(indices->type));
			
 
				+      return kTfLiteError;
			
 
				+  }
			
 
				+
			
 
				+  const int params_rank = NumDimensions(params);
			
 
				+  const int indices_rank = NumDimensions(indices);
			
 
				+  const int indices_nd = SizeOfDimension(indices, indices_rank - 1);
			
 
				+  if (params_rank < 1) {
			
 
				+    TF_LITE_KERNEL_LOG(context, "Params must be at least a vector.");
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+  if (indices_rank < 1) {
			
 
				+    TF_LITE_KERNEL_LOG(context, "Indices must be at least a vector.");
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+  if (indices_nd > params_rank) {
			
 
				+    TF_LITE_KERNEL_LOG(
			
 
				+        context, "Index innermost dimension length must be <= params rank.");
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+  if (indices_nd > MAX_INDICES_ND) {
			
 
				+    TF_LITE_KERNEL_LOG(context,
			
 
				+                       "Index innermost dimension length must not exceed %d.",
			
 
				+                       MAX_INDICES_ND);
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+
			
 
				+  // Assign to output the input type.
			
 
				+  output->type = params->type;
			
 
				+
			
 
				+  // TFLM gather_nd does not create the output tensor, but it needs to ensure
			
 
				+  // that the output shape is correct. The result shape is
			
 
				+  // indices.shape[:-1] + params.shape[indices.shape[-1]:]
			
 
				+  TfLiteIntArray* output_shape = output->dims;
			
 
				+  int output_index = 0;
			
 
				+  for (int i = 0; i < indices_rank - 1; ++i) {
			
 
				+    output_shape->data[output_index++] = indices->dims->data[i];
			
 
				+  }
			
 
				+  for (int i = indices_nd; i < params_rank; ++i) {
			
 
				+    output_shape->data[output_index++] = params->dims->data[i];
			
 
				+  }
			
 
				+  output_shape->size = output_index;
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+template <typename ParamsT, typename IndicesT>
			
 
				+TfLiteStatus GatherNd(const TfLiteEvalTensor* params,
			
 
				+                      const TfLiteEvalTensor* indices,
			
 
				+                      TfLiteEvalTensor* output) {
			
 
				+  const int indices_dims = indices->dims->size;
			
 
				+  const int indices_nd = indices->dims->data[indices_dims - 1];
			
 
				+  const int params_dims = params->dims->size;
			
 
				+  const IndicesT* index_data = tflite::micro::GetTensorData<IndicesT>(indices);
			
 
				+  const ParamsT* param_data = tflite::micro::GetTensorData<ParamsT>(params);
			
 
				+  ParamsT* output_data = tflite::micro::GetTensorData<ParamsT>(output);
			
 
				+
			
 
				+  int n_slices = 1;
			
 
				+  for (int i = 0; i < indices_dims - 1; ++i) {
			
 
				+    n_slices *= indices->dims->data[i];
			
 
				+  }
			
 
				+
			
 
				+  // If indices[-1] == params.rank, fetch single elements.
			
 
				+  // If indices[-1] < params.rank, fetch slices.
			
 
				+  int slice_size = 1;
			
 
				+  for (int i = indices_nd; i < params_dims; ++i) {
			
 
				+    slice_size *= params->dims->data[i];
			
 
				+  }
			
 
				+
			
 
				+  int remain_flat_size = ElementCount(*params->dims);
			
 
				+
			
 
				+  // Number of elements per dimension
			
 
				+  int dims_to_count[MAX_INDICES_ND];
			
 
				+  for (int i = 0; i < indices_nd; ++i) {
			
 
				+    dims_to_count[i] = remain_flat_size / params->dims->data[i];
			
 
				+    remain_flat_size = dims_to_count[i];
			
 
				+  }
			
 
				+
			
 
				+  for (int i = 0; i < n_slices; ++i) {
			
 
				+    int from_pos = 0;
			
 
				+    for (int j = 0; j < indices_nd; ++j) {
			
 
				+      int offset = i * indices_nd + j;
			
 
				+      IndicesT index = index_data[offset];
			
 
				+      from_pos += index * dims_to_count[j];
			
 
				+    }
			
 
				+    std::memcpy(output_data + i * slice_size, param_data + from_pos,
			
 
				+                sizeof(ParamsT) * slice_size);
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+template <typename IndicesT>
			
 
				+TfLiteStatus EvalGatherNd(TfLiteContext* context,
			
 
				+                          const TfLiteEvalTensor* params,
			
 
				+                          const TfLiteEvalTensor* indices,
			
 
				+                          TfLiteEvalTensor* output) {
			
 
				+  switch (params->type) {
			
 
				+    case kTfLiteFloat32:
			
 
				+      return GatherNd<float, IndicesT>(params, indices, output);
			
 
				+      break;
			
 
				+    case kTfLiteInt8:
			
 
				+      return GatherNd<int8_t, IndicesT>(params, indices, output);
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "Params type '%s' are not supported by gather_nd.",
			
 
				+                         TfLiteTypeGetName(params->type));
			
 
				+      return kTfLiteError;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const TfLiteEvalTensor* params =
			
 
				+      tflite::micro::GetEvalInput(context, node, kParams);
			
 
				+  const TfLiteEvalTensor* indices =
			
 
				+      tflite::micro::GetEvalInput(context, node, kIndices);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  switch (indices->type) {
			
 
				+    case kTfLiteInt32:
			
 
				+      return EvalGatherNd<int32_t>(context, params, indices, output);
			
 
				+      break;
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "Indices of type '%s' are not supported by gather_nd.",
			
 
				+                         TfLiteTypeGetName(indices->type));
			
 
				+      return kTfLiteError;
			
 
				+  }
			
 
				+}
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_GATHER_ND() {
			
 
				+  return {/*init=*/nullptr,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
@@ -1,4 +1,4 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				 
			
 
				 Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 you may not use this file except in compliance with the License.
			
@@ -23,72 +23,23 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/types.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
 
				 #include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/kernels/hard_swish.h"
			
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_error_reporter.h"
			
 
				 #include "tensorflow/lite/micro/micro_utils.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				-namespace ops {
			
 
				-namespace micro {
			
 
				-namespace hard_swish {
			
 
				-
			
 
				-constexpr int kInputTensor = 0;
			
 
				-constexpr int kOutputTensor = 0;
			
 
				-
			
 
				+namespace {
			
 
				 void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				   return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
			
 
				 }
			
 
				 
			
 
				-TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
			
 
				-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				-
			
 
				-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				-  TF_LITE_ENSURE(context, input != nullptr);
			
 
				-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
			
 
				-  TF_LITE_ENSURE(context, output != nullptr);
			
 
				-
			
 
				-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
			
 
				-    HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
			
 
				-
			
 
				-    params->input_zero_point = input->params.zero_point;
			
 
				-    params->output_zero_point = output->params.zero_point;
			
 
				-
			
 
				-    const float input_scale = input->params.scale;
			
 
				-    const float hires_input_scale = (1.0f / 128.0f) * input_scale;
			
 
				-    const float reluish_scale = 3.0f / 32768.0f;
			
 
				-    const float output_scale = output->params.scale;
			
 
				-
			
 
				-    const double output_multiplier =
			
 
				-        static_cast<double>(hires_input_scale / output_scale);
			
 
				-    int32_t output_multiplier_fixedpoint_int32;
			
 
				-    QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
			
 
				-                       &params->output_multiplier_exponent);
			
 
				-    DownScaleInt32ToInt16Multiplier(
			
 
				-        output_multiplier_fixedpoint_int32,
			
 
				-        &params->output_multiplier_fixedpoint_int16);
			
 
				-
			
 
				-    TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
			
 
				-
			
 
				-    const double reluish_multiplier =
			
 
				-        static_cast<double>(hires_input_scale / reluish_scale);
			
 
				-    int32_t reluish_multiplier_fixedpoint_int32;
			
 
				-    QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
			
 
				-                       &params->reluish_multiplier_exponent);
			
 
				-    DownScaleInt32ToInt16Multiplier(
			
 
				-        reluish_multiplier_fixedpoint_int32,
			
 
				-        &params->reluish_multiplier_fixedpoint_int16);
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				 TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   const TfLiteEvalTensor* input =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+      tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
			
 
				   TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+      tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
			
 
				   HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
			
 
				 
			
 
				   switch (input->type) {
			
@@ -99,13 +50,6 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
 
				           tflite::micro::GetTensorShape(output),
			
 
				           tflite::micro::GetTensorData<float>(output));
			
 
				     } break;
			
 
				-    case kTfLiteUInt8: {
			
 
				-      tflite::reference_ops::HardSwish<uint8_t>(
			
 
				-          *params, tflite::micro::GetTensorShape(input),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(input),
			
 
				-          tflite::micro::GetTensorShape(output),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-    } break;
			
 
				     case kTfLiteInt8: {
			
 
				       tflite::reference_ops::HardSwish<int8_t>(
			
 
				           *params, tflite::micro::GetTensorShape(input),
			
@@ -114,29 +58,24 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
 
				           tflite::micro::GetTensorData<int8_t>(output));
			
 
				     } break;
			
 
				     default: {
			
 
				-      TF_LITE_KERNEL_LOG(
			
 
				-          context,
			
 
				-          "Only float32/int8_t/uint8_t are supported currently, got %s",
			
 
				-          TfLiteTypeGetName(input->type));
			
 
				+      MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
			
 
				       return kTfLiteError;
			
 
				     }
			
 
				   }
			
 
				   return kTfLiteOk;
			
 
				 }
			
 
				 
			
 
				-}  // namespace hard_swish
			
 
				+}  // namespace
			
 
				 
			
 
				 TfLiteRegistration Register_HARD_SWISH() {
			
 
				-  return {/*init=*/hard_swish::HardSwishInit,
			
 
				+  return {/*init=*/HardSwishInit,
			
 
				           /*free=*/nullptr,
			
 
				-          /*prepare=*/hard_swish::HardSwishPrepare,
			
 
				-          /*invoke=*/hard_swish::HardSwishEval,
			
 
				+          /*prepare=*/tflite::HardSwishPrepare,
			
 
				+          /*invoke=*/HardSwishEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
 
				           /*version=*/0};
			
 
				 }
			
 
				 
			
 
				-}  // namespace micro
			
 
				-}  // namespace ops
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish.h
@@ -0,0 +1,30 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
			
 
				+#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+extern const int kHardSwishInputTensor;
			
 
				+extern const int kHardSwishOutputTensor;
			
 
				+
			
 
				+TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/hard_swish_common.cc
@@ -0,0 +1,79 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/kernels/hard_swish.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+const int kHardSwishInputTensor = 0;
			
 
				+const int kHardSwishOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kHardSwishInputTensor);
			
 
				+  TF_LITE_ENSURE(context, input != nullptr);
			
 
				+  TfLiteTensor* output = GetOutput(context, node, kHardSwishOutputTensor);
			
 
				+  TF_LITE_ENSURE(context, output != nullptr);
			
 
				+
			
 
				+  if (input->type == kTfLiteInt8) {
			
 
				+    HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
			
 
				+
			
 
				+    params->input_zero_point = input->params.zero_point;
			
 
				+    params->output_zero_point = output->params.zero_point;
			
 
				+
			
 
				+    const float input_scale = input->params.scale;
			
 
				+    const float hires_input_scale = (1.0f / 128.0f) * input_scale;
			
 
				+    const float reluish_scale = 3.0f / 32768.0f;
			
 
				+    const float output_scale = output->params.scale;
			
 
				+
			
 
				+    const double output_multiplier =
			
 
				+        static_cast<double>(hires_input_scale / output_scale);
			
 
				+    int32_t output_multiplier_fixedpoint_int32;
			
 
				+    QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
			
 
				+                       &params->output_multiplier_exponent);
			
 
				+    DownScaleInt32ToInt16Multiplier(
			
 
				+        output_multiplier_fixedpoint_int32,
			
 
				+        &params->output_multiplier_fixedpoint_int16);
			
 
				+
			
 
				+    TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
			
 
				+
			
 
				+    const double reluish_multiplier =
			
 
				+        static_cast<double>(hires_input_scale / reluish_scale);
			
 
				+    int32_t reluish_multiplier_fixedpoint_int32;
			
 
				+    QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
			
 
				+                       &params->reluish_multiplier_exponent);
			
 
				+    DownScaleInt32ToInt16Multiplier(
			
 
				+        reluish_multiplier_fixedpoint_int32,
			
 
				+        &params->reluish_multiplier_fixedpoint_int16);
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/if.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/if.cc
@@ -0,0 +1,166 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include <stddef.h>
			
 
				+
			
 
				+#include <cstring>
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/memory_helpers.h"
			
 
				+#include "tensorflow/lite/micro/micro_graph.h"
			
 
				+#include "tensorflow/lite/schema/schema_generated.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+namespace {
			
 
				+
			
 
				+struct OpData {
			
 
				+  int then_subgraph_index;
			
 
				+  int else_subgraph_index;
			
 
				+};
			
 
				+
			
 
				+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
			
 
				+  const auto* params =
			
 
				+      reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
			
 
				+  op_data->then_subgraph_index = params->then_subgraph_index;
			
 
				+  op_data->else_subgraph_index = params->else_subgraph_index;
			
 
				+
			
 
				+  TF_LITE_ENSURE(context, node->inputs->size > 0);
			
 
				+
			
 
				+  // The first input is the condition.
			
 
				+  const TfLiteTensor* cond;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
			
 
				+  TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
			
 
				+
			
 
				+  // The first input of the node is the condition. The rest of inputs are
			
 
				+  // passed to the branch subgraphs. Therefore, the number of subgraph inputs
			
 
				+  // will be the number of node inputs - 1.
			
 
				+  size_t num_inputs = node->inputs->size - 1;
			
 
				+  size_t num_outputs = node->outputs->size;
			
 
				+
			
 
				+  // Casting to TfliteIntArray is required since we are re-using
			
 
				+  // GetExecutionPlan from TfLiteContext. On TFLM this method returns a
			
 
				+  // MicroGraph.
			
 
				+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
			
 
				+  MicroGraph* graph_info;
			
 
				+  context->GetExecutionPlan(context,
			
 
				+                            reinterpret_cast<TfLiteIntArray**>(&graph_info));
			
 
				+
			
 
				+  TF_LITE_ENSURE(context,
			
 
				+                 op_data->then_subgraph_index < graph_info->NumSubgraphs());
			
 
				+  TF_LITE_ENSURE(context,
			
 
				+                 op_data->else_subgraph_index < graph_info->NumSubgraphs());
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(
			
 
				+      context, num_inputs,
			
 
				+      graph_info->NumSubgraphInputs(op_data->then_subgraph_index));
			
 
				+  TF_LITE_ENSURE_EQ(
			
 
				+      context, num_outputs,
			
 
				+      graph_info->NumSubgraphOutputs(op_data->then_subgraph_index));
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
			
 
				+
			
 
				+  const TfLiteTensor* cond;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
			
 
				+  bool cond_value = cond->data.b[0];
			
 
				+
			
 
				+  // Casting to TfliteIntArray is required since we are re-using
			
 
				+  // GetExecutionPlan from TfLiteContext. On TFLM this method returns a
			
 
				+  // MicroGraph.
			
 
				+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
			
 
				+  MicroGraph* graph_info;
			
 
				+  context->GetExecutionPlan(context,
			
 
				+                            reinterpret_cast<TfLiteIntArray**>(&graph_info));
			
 
				+
			
 
				+  // Currently we copy the input / output between the subgraphs. This isn't
			
 
				+  // optimized yet.
			
 
				+  int active_branch_subgraph_index =
			
 
				+      cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
			
 
				+
			
 
				+  for (size_t i = 0;
			
 
				+       i < graph_info->NumSubgraphInputs(active_branch_subgraph_index); ++i) {
			
 
				+    const TfLiteEvalTensor* input =
			
 
				+        tflite::micro::GetEvalInput(context, node, i + 1);
			
 
				+
			
 
				+    TfLiteEvalTensor* subgraph_input =
			
 
				+        graph_info->GetSubgraphInput(active_branch_subgraph_index, i);
			
 
				+
			
 
				+    // These checks must occur in Eval since TfLiteEvalTensors are not available
			
 
				+    // during Prepare.
			
 
				+    size_t input_bytes;
			
 
				+    size_t subgraph_input_bytes;
			
 
				+    TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(input, &input_bytes));
			
 
				+    TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
			
 
				+                                   subgraph_input, &subgraph_input_bytes));
			
 
				+    TF_LITE_ENSURE_TYPES_EQ(context, input->type, subgraph_input->type);
			
 
				+    TF_LITE_ENSURE_EQ(context, input_bytes, subgraph_input_bytes);
			
 
				+    memcpy(subgraph_input->data.raw, input->data.raw, input_bytes);
			
 
				+  }
			
 
				+
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    graph_info->InvokeSubgraph(active_branch_subgraph_index));
			
 
				+
			
 
				+  for (size_t i = 0;
			
 
				+       i < graph_info->NumSubgraphOutputs(active_branch_subgraph_index); ++i) {
			
 
				+    const TfLiteEvalTensor* output =
			
 
				+        tflite::micro::GetEvalOutput(context, node, i);
			
 
				+
			
 
				+    TfLiteEvalTensor* subgraph_output =
			
 
				+        graph_info->GetSubgraphOutput(active_branch_subgraph_index, i);
			
 
				+
			
 
				+    // These checks must occur in Eval since TfLiteEvalTensors are not available
			
 
				+    // during Prepare.
			
 
				+    size_t output_bytes;
			
 
				+    size_t subgraph_output_bytes;
			
 
				+    TF_LITE_ENSURE_OK(context,
			
 
				+                      TfLiteEvalTensorByteLength(output, &output_bytes));
			
 
				+    TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
			
 
				+                                   subgraph_output, &subgraph_output_bytes));
			
 
				+    TF_LITE_ENSURE_TYPES_EQ(context, output->type, subgraph_output->type);
			
 
				+    TF_LITE_ENSURE_EQ(context, output_bytes, subgraph_output_bytes);
			
 
				+    memcpy(output->data.raw, subgraph_output->data.raw, output_bytes);
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+}  // namespace.
			
 
				+
			
 
				+TfLiteRegistration Register_IF() {
			
 
				+  return {/*init=*/Init,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
@@ -16,6 +16,8 @@ limitations under the License.
 
				 #include "tensorflow/lite/micro/kernels/kernel_runner.h"
			
 
				 
			
 
				 #include "tensorflow/lite/micro/micro_error_reporter.h"
			
 
				+#include "tensorflow/lite/micro/simple_memory_allocator.h"
			
 
				+#include "tensorflow/lite/micro/test_helpers.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				 namespace micro {
			
@@ -37,7 +39,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
 
				                                                kKernelRunnerBuffer_,
			
 
				                                                kKernelRunnerBufferSize_)),
			
 
				       registration_(registration),
			
 
				-      tensors_(tensors) {
			
 
				+      tensors_(tensors),
			
 
				+      mock_micro_graph_(allocator_) {
			
 
				   // Prepare TfLiteContext:
			
 
				   context_.impl_ = static_cast<void*>(this);
			
 
				   context_.ReportError = ReportOpError;
			
@@ -47,6 +50,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
 
				   context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
			
 
				   context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
			
 
				   context_.GetScratchBuffer = GetScratchBuffer;
			
 
				+  context_.GetExecutionPlan = GetGraph;
			
 
				+  context_.recommended_num_threads = 0;
			
 
				 
			
 
				   // Prepare TfLiteNode:
			
 
				   node_.inputs = inputs;
			
@@ -157,5 +162,15 @@ void KernelRunner::ReportOpError(struct TfLiteContext* context,
 
				   va_end(args);
			
 
				 }
			
 
				 
			
 
				+TfLiteStatus KernelRunner::GetGraph(struct TfLiteContext* context,
			
 
				+                                    TfLiteIntArray** args) {
			
 
				+  TFLITE_DCHECK(context != nullptr);
			
 
				+  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
			
 
				+  TFLITE_DCHECK(runner != nullptr);
			
 
				+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
			
 
				+  *args = reinterpret_cast<TfLiteIntArray*>(runner->GetMockGraph());
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				 }  // namespace micro
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.h
@@ -18,6 +18,7 @@ limitations under the License.
 
				 
			
 
				 #include "tensorflow/lite/c/common.h"
			
 
				 #include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				+#include "tensorflow/lite/micro/mock_micro_graph.h"
			
 
				 #include "tensorflow/lite/micro/simple_memory_allocator.h"
			
 
				 
			
 
				 namespace tflite {
			
@@ -45,6 +46,10 @@ class KernelRunner {
 
				   // passed into the constructor of this class.
			
 
				   TfLiteStatus Invoke();
			
 
				 
			
 
				+  // Returns a pointer to the internal MockMicroGraph which KernelRunner uses
			
 
				+  // to stub out MicroGraph methods and track invocations on each subgraph.
			
 
				+  MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
			
 
				+
			
 
				  protected:
			
 
				   static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
			
 
				                                  int tensor_index);
			
@@ -57,6 +62,11 @@ class KernelRunner {
 
				   static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
			
 
				   static void ReportOpError(struct TfLiteContext* context, const char* format,
			
 
				                             ...);
			
 
				+  // This method matches GetExecutionPlan from TfLiteContext since TFLM reuses
			
 
				+  // this method to get the MicroGraph from an operator context.
			
 
				+  // TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
			
 
				+  static TfLiteStatus GetGraph(struct TfLiteContext* context,
			
 
				+                               TfLiteIntArray** args);
			
 
				 
			
 
				  private:
			
 
				   static constexpr int kNumScratchBuffers_ = 12;
			
@@ -67,6 +77,7 @@ class KernelRunner {
 
				   SimpleMemoryAllocator* allocator_ = nullptr;
			
 
				   const TfLiteRegistration& registration_;
			
 
				   TfLiteTensor* tensors_ = nullptr;
			
 
				+  MockMicroGraph mock_micro_graph_;
			
 
				 
			
 
				   TfLiteContext context_ = {};
			
 
				   TfLiteNode node_ = {};
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.cc
@@ -49,5 +49,30 @@ PaddingType RuntimePaddingType(TfLitePadding padding) {
 
				   }
			
 
				 }
			
 
				 
			
 
				+// Relocate tensor dims from FlatBuffer to the persistent storage arena.
			
 
				+// The old dims data is copied to the new storage area.
			
 
				+// The tensor and eval_tensor must be the same tensor.
			
 
				+// Only use during Prepare phase.
			
 
				+TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
			
 
				+                                              TfLiteTensor* tensor,
			
 
				+                                              TfLiteEvalTensor* eval_tensor) {
			
 
				+  TF_LITE_ENSURE(context, tensor != nullptr);
			
 
				+  TF_LITE_ENSURE(context, eval_tensor != nullptr);
			
 
				+  TF_LITE_ENSURE(context, context->AllocatePersistentBuffer != nullptr);
			
 
				+  int ranks = tensor->dims->size;
			
 
				+  size_t alloc_size = TfLiteIntArrayGetSizeInBytes(ranks);
			
 
				+  TfLiteIntArray* new_dims = static_cast<TfLiteIntArray*>(
			
 
				+      context->AllocatePersistentBuffer(context, alloc_size));
			
 
				+  TfLiteIntArray* old_dims = tensor->dims;
			
 
				+  new_dims->size = ranks;
			
 
				+  tensor->dims = new_dims;
			
 
				+  eval_tensor->dims = new_dims;
			
 
				+  for (int i = 0; i < ranks; i++) {
			
 
				+    new_dims->data[i] = old_dims->data[i];
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				 }  // namespace micro
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
@@ -72,6 +72,14 @@ bool HaveSameShapes(const TfLiteEvalTensor* input1,
 
				 
			
 
				 PaddingType RuntimePaddingType(TfLitePadding padding);
			
 
				 
			
 
				+// Relocate tensor dims from FlatBuffer to the persistent storage arena.
			
 
				+// The old dims data is copied to the new storage area.
			
 
				+// The tensor and eval_tensor must be the same tensor.
			
 
				+// Only use during Prepare phase.
			
 
				+TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
			
 
				+                                              TfLiteTensor* tensor,
			
 
				+                                              TfLiteEvalTensor* eval_tensor);
			
 
				+
			
 
				 }  // namespace micro
			
 
				 }  // namespace tflite
			
 
				 
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/l2_pool_2d.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/l2_pool_2d.cc
@@ -70,7 +70,13 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
 
				   // The dims storage is expected to be the same area in memory
			
 
				   // for both TfLiteTensor and TfLiteEvalTensor.  This is important
			
 
				   // because TfLiteTensor in the MicroInterpreter is a temporary
			
 
				-  // allocation.
			
 
				+  // allocation.  For the KernelRunner interpreter, TfLiteEvalTensor
			
 
				+  // is a temporary allocation.  We must therefore relocate the dims
			
 
				+  // from the FlatBuffer to the persistant storage arena.
			
 
				+  TfLiteEvalTensor* output_eval =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
			
 
				+                                 context, output, output_eval));
			
 
				   output->dims->data[kBatchRank] = batches;
			
 
				   output->dims->data[kHeightRank] = out_height;
			
 
				   output->dims->data[kWidthRank] = out_width;
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/l2norm.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/l2norm.cc
@@ -67,8 +67,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
				     data->input_zero_point = 0;
			
 
				   }
			
 
				 
			
 
				-  // TODO(ahentz): For some reason our implementations don't support
			
 
				-  // activations.
			
 
				+  // Our implementations don't currently support activations.
			
 
				   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
			
 
				 
			
 
				   return kTfLiteOk;
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/leaky_relu.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/leaky_relu.cc
@@ -68,7 +68,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
 
				                     GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
			
 
				 
			
 
				-  if (output->type == kTfLiteInt8) {
			
 
				+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
			
 
				     LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
			
 
				     const auto* params =
			
 
				         static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
			
@@ -127,6 +127,10 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
 
				       QuantizeLeakyRelu<int8_t>(data, input, output);
			
 
				       return kTfLiteOk;
			
 
				     } break;
			
 
				+    case kTfLiteInt16: {
			
 
				+      QuantizeLeakyRelu<int16_t>(data, input, output);
			
 
				+      return kTfLiteOk;
			
 
				+    } break;
			
 
				     default:
			
 
				       TF_LITE_KERNEL_LOG(
			
 
				           context, "Only float32, int8 are supported by LEAKY_RELU, got %s.",
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/log_softmax.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/log_softmax.cc
@@ -0,0 +1,150 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
			
 
				+
			
 
				+#include <cstddef>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/internal/types.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+// used only with quantized data
			
 
				+struct LogSoftmaxOpData {
			
 
				+  int32_t input_multiplier;
			
 
				+  int32_t input_left_shift;
			
 
				+  int32_t reverse_scaling_divisor;
			
 
				+  int32_t reverse_scaling_right_shift;
			
 
				+  int diff_min;
			
 
				+  size_t outer_size;  // number of tensor elements skipping computation axis
			
 
				+  size_t depth;       // number of tensor elements on computation axis
			
 
				+};
			
 
				+
			
 
				+// input/output tensor index
			
 
				+constexpr int kInputTensor = 0;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+  const TfLiteTensor* input;
			
 
				+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
			
 
				+  TfLiteTensor* output;
			
 
				+  TF_LITE_ENSURE_OK(context,
			
 
				+                    GetOutputSafe(context, node, kOutputTensor, &output));
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
			
 
				+
			
 
				+  TF_LITE_ENSURE(context, HaveSameShapes(input, output));
			
 
				+
			
 
				+  if (input->type == kTfLiteInt8) {
			
 
				+    node->user_data =
			
 
				+        context->AllocatePersistentBuffer(context, sizeof(LogSoftmaxOpData));
			
 
				+    auto data = static_cast<LogSoftmaxOpData*>(node->user_data);
			
 
				+
			
 
				+    // quantization datum
			
 
				+    constexpr int32_t kOutputZeroPoint = 127;
			
 
				+    constexpr float kOutputScale = 16.0 / 256;
			
 
				+    constexpr double kBeta = 1.0;
			
 
				+    constexpr int kScaledDiffIntegerBits = 5;
			
 
				+
			
 
				+    TF_LITE_ENSURE(context, output->params.scale == kOutputScale);
			
 
				+    TF_LITE_ENSURE(context, output->params.zero_point == kOutputZeroPoint);
			
 
				+
			
 
				+    int input_left_shift;
			
 
				+    int reverse_scaling_right_shift;
			
 
				+    tflite::PreprocessLogSoftmaxScalingExp(
			
 
				+        kBeta, static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
			
 
				+        &data->input_multiplier, &input_left_shift,
			
 
				+        &data->reverse_scaling_divisor, &reverse_scaling_right_shift);
			
 
				+    data->input_left_shift = static_cast<int32_t>(input_left_shift);
			
 
				+    data->reverse_scaling_right_shift =
			
 
				+        static_cast<int32_t>(-reverse_scaling_right_shift);
			
 
				+    // diff_min has a negative value, and is used to limit the maximum magnitude
			
 
				+    // of the diffs, which are <= 0.
			
 
				+    data->diff_min =
			
 
				+        -tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
			
 
				+
			
 
				+    RuntimeShape input_shape = GetTensorShape(input);
			
 
				+    const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				+    data->outer_size =
			
 
				+        static_cast<size_t>(FlatSizeSkipDim(input_shape, trailing_dim));
			
 
				+    data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  return CalculateOpData(context, node);
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  const LogSoftmaxOpData* data =
			
 
				+      static_cast<LogSoftmaxOpData*>(node->user_data);
			
 
				+  const TfLiteEvalTensor* input =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+  switch (input->type) {
			
 
				+    case kTfLiteFloat32: {
			
 
				+      SoftmaxParams op_params = {};
			
 
				+      reference_ops::LogSoftmax(op_params, tflite::micro::GetTensorShape(input),
			
 
				+                                tflite::micro::GetTensorData<float>(input),
			
 
				+                                tflite::micro::GetTensorShape(output),
			
 
				+                                tflite::micro::GetTensorData<float>(output));
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    case kTfLiteInt8: {
			
 
				+      SoftmaxParams op_params = {};
			
 
				+      op_params.input_multiplier = data->input_multiplier;
			
 
				+      op_params.input_left_shift = data->input_left_shift;
			
 
				+      op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
			
 
				+      op_params.reverse_scaling_right_shift = data->reverse_scaling_right_shift;
			
 
				+      op_params.diff_min = data->diff_min;
			
 
				+      reference_ops::LogSoftmax(op_params, data->outer_size, data->depth,
			
 
				+                                tflite::micro::GetTensorShape(input),
			
 
				+                                tflite::micro::GetTensorData<int8_t>(input),
			
 
				+                                tflite::micro::GetTensorShape(output),
			
 
				+                                tflite::micro::GetTensorData<int8_t>(output));
			
 
				+      return kTfLiteOk;
			
 
				+    }
			
 
				+    default:
			
 
				+      TF_LITE_KERNEL_LOG(context,
			
 
				+                         "LOG_SOFTMAX only supports float32, int8, got %s.",
			
 
				+                         TfLiteTypeGetName(input->type));
			
 
				+      return kTfLiteError;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_LOG_SOFTMAX() {
			
 
				+  return {/*init=*/nullptr,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/LogSoftmaxPrepare,
			
 
				+          /*invoke=*/LogSoftmaxEval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.cc
@@ -1,4 +1,4 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				 
			
 
				 Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 you may not use this file except in compliance with the License.
			
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
				 See the License for the specific language governing permissions and
			
 
				 limitations under the License.
			
 
				 ==============================================================================*/
			
 
				+#include "tensorflow/lite/micro/kernels/logical.h"
			
 
				+
			
 
				 #include "tensorflow/lite/c/common.h"
			
 
				 #include "tensorflow/lite/kernels/internal/reference/binary_function.h"
			
 
				 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
@@ -19,60 +21,17 @@ limitations under the License.
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				-namespace ops {
			
 
				-namespace micro {
			
 
				-namespace logical {
			
 
				 namespace {
			
 
				 
			
 
				-// Input/output tensor index.
			
 
				-constexpr int kInputTensor1 = 0;
			
 
				-constexpr int kInputTensor2 = 1;
			
 
				-constexpr int kOutputTensor = 0;
			
 
				-
			
 
				-TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
			
 
				-                         bool (*func)(bool, bool)) {
			
 
				-  const TfLiteEvalTensor* input1 =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
			
 
				-  const TfLiteEvalTensor* input2 =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
			
 
				-  TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				-
			
 
				-  if (tflite::micro::HaveSameShapes(input1, input2)) {
			
 
				-    reference_ops::BinaryFunction<bool, bool, bool>(
			
 
				-        tflite::micro::GetTensorShape(input1),
			
 
				-        tflite::micro::GetTensorData<bool>(input1),
			
 
				-        tflite::micro::GetTensorShape(input2),
			
 
				-        tflite::micro::GetTensorData<bool>(input2),
			
 
				-        tflite::micro::GetTensorShape(output),
			
 
				-        tflite::micro::GetTensorData<bool>(output), func);
			
 
				-  } else {
			
 
				-    reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
			
 
				-        tflite::micro::GetTensorShape(input1),
			
 
				-        tflite::micro::GetTensorData<bool>(input1),
			
 
				-        tflite::micro::GetTensorShape(input2),
			
 
				-        tflite::micro::GetTensorData<bool>(input2),
			
 
				-        tflite::micro::GetTensorShape(output),
			
 
				-        tflite::micro::GetTensorData<bool>(output), func);
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-bool LogicalOr(bool x, bool y) { return x || y; }
			
 
				-
			
 
				 TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   return LogicalImpl(context, node, LogicalOr);
			
 
				 }
			
 
				 
			
 
				-bool LogicalAnd(bool x, bool y) { return x && y; }
			
 
				-
			
 
				 TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   return LogicalImpl(context, node, LogicalAnd);
			
 
				 }
			
 
				 
			
 
				 }  // namespace
			
 
				-}  // namespace logical
			
 
				 
			
 
				 TfLiteRegistration Register_LOGICAL_OR() {
			
 
				   // Init, Free, Prepare, Eval are satisfying the Interface required by
			
@@ -80,7 +39,7 @@ TfLiteRegistration Register_LOGICAL_OR() {
 
				   return {/*init=*/nullptr,
			
 
				           /*free=*/nullptr,
			
 
				           /*prepare=*/nullptr,
			
 
				-          /*invoke=*/logical::LogicalOrEval,
			
 
				+          /*invoke=*/LogicalOrEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
@@ -93,13 +52,11 @@ TfLiteRegistration Register_LOGICAL_AND() {
 
				   return {/*init=*/nullptr,
			
 
				           /*free=*/nullptr,
			
 
				           /*prepare=*/nullptr,
			
 
				-          /*invoke=*/logical::LogicalAndEval,
			
 
				+          /*invoke=*/LogicalAndEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
 
				           /*version=*/0};
			
 
				 }
			
 
				 
			
 
				-}  // namespace micro
			
 
				-}  // namespace ops
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logical.h
@@ -0,0 +1,35 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
			
 
				+#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+// Input/output tensor index.
			
 
				+extern const int kLogicalInputTensor1;
			
 
				+extern const int kLogicalInputTensor2;
			
 
				+extern const int kLogicalOutputTensor;
			
 
				+
			
 
				+TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
			
 
				+                         bool (*func)(bool, bool));
			
 
				+
			
 
				+bool LogicalOr(bool x, bool y);
			
 
				+bool LogicalAnd(bool x, bool y);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logical_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logical_common.cc
@@ -0,0 +1,63 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/logical.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+// Input/output tensor index.
			
 
				+const int kLogicalInputTensor1 = 0;
			
 
				+const int kLogicalInputTensor2 = 1;
			
 
				+const int kLogicalOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
			
 
				+                         bool (*func)(bool, bool)) {
			
 
				+  const TfLiteEvalTensor* input1 =
			
 
				+      tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
			
 
				+  const TfLiteEvalTensor* input2 =
			
 
				+      tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
			
 
				+
			
 
				+  if (tflite::micro::HaveSameShapes(input1, input2)) {
			
 
				+    reference_ops::BinaryFunction<bool, bool, bool>(
			
 
				+        tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<bool>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2),
			
 
				+        tflite::micro::GetTensorData<bool>(input2),
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<bool>(output), func);
			
 
				+  } else {
			
 
				+    reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
			
 
				+        tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<bool>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2),
			
 
				+        tflite::micro::GetTensorData<bool>(input2),
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<bool>(output), func);
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+bool LogicalOr(bool x, bool y) { return x || y; }
			
 
				+
			
 
				+bool LogicalAnd(bool x, bool y) { return x && y; }
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.cc
@@ -1,4 +1,4 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				 
			
 
				 Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 you may not use this file except in compliance with the License.
			
@@ -24,71 +24,24 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
 
				 #include "tensorflow/lite/kernels/op_macros.h"
			
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/logistic.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				-namespace ops {
			
 
				-namespace micro {
			
 
				-namespace activations {
			
 
				 namespace {
			
 
				-constexpr int kInputTensor = 0;
			
 
				-constexpr int kOutputTensor = 0;
			
 
				-
			
 
				-struct OpData {
			
 
				-  int32_t input_zero_point;
			
 
				-  int32_t input_range_radius;
			
 
				-  int32_t input_multiplier;
			
 
				-  int input_left_shift;
			
 
				-};
			
 
				-
			
 
				-TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
			
 
				-                                       OpData* data) {
			
 
				-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				-  TF_LITE_ENSURE(context, input != nullptr);
			
 
				-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
			
 
				-  TF_LITE_ENSURE(context, output != nullptr);
			
 
				-
			
 
				-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
			
 
				-  if (input->type == kTfLiteInt8) {
			
 
				-    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
			
 
				-                      std::numeric_limits<int8_t>::min());
			
 
				-
			
 
				-    static constexpr int kInputIntegerBits = 4;
			
 
				-    const double input_real_multiplier =
			
 
				-        static_cast<double>(input->params.scale) *
			
 
				-        static_cast<double>(1 << (31 - kInputIntegerBits));
			
 
				-
			
 
				-    data->input_zero_point = input->params.zero_point;
			
 
				-
			
 
				-    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
			
 
				-    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
			
 
				-
			
 
				-    data->input_range_radius =
			
 
				-        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
			
 
				-  }
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-}  // namespace
			
 
				 
			
 
				 void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
			
 
				-}
			
 
				-
			
 
				-TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  OpData* data = static_cast<OpData*>(node->user_data);
			
 
				-
			
 
				-  return CalculateArithmeticOpData(context, node, data);
			
 
				+  return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
			
 
				 }
			
 
				 
			
 
				 TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   const TfLiteEvalTensor* input =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+      tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
			
 
				   TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+      tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
			
 
				 
			
 
				   TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  OpData* data = static_cast<OpData*>(node->user_data);
			
 
				+  OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
			
 
				 
			
 
				   if (input->type == kTfLiteFloat32) {
			
 
				     switch (output->type) {
			
@@ -133,18 +86,16 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
 
				   return kTfLiteOk;
			
 
				 }
			
 
				 
			
 
				-}  // namespace activations
			
 
				+}  // namespace
			
 
				 
			
 
				 TfLiteRegistration Register_LOGISTIC() {
			
 
				-  return {/*init=*/activations::LogisticInit,
			
 
				+  return {/*init=*/LogisticInit,
			
 
				           /*free=*/nullptr,
			
 
				-          /*prepare=*/activations::LogisticPrepare,
			
 
				-          /*invoke=*/activations::LogisticEval,
			
 
				+          /*prepare=*/LogisticPrepare,
			
 
				+          /*invoke=*/LogisticEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
 
				           /*version=*/0};
			
 
				 }
			
 
				-}  // namespace micro
			
 
				-}  // namespace ops
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic.h
@@ -0,0 +1,42 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
			
 
				+#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+extern const int kLogisticInputTensor;
			
 
				+extern const int kLogisticOutputTensor;
			
 
				+
			
 
				+struct OpDataLogistic {
			
 
				+  int32_t input_zero_point;
			
 
				+  int32_t input_range_radius;
			
 
				+  int32_t input_multiplier;
			
 
				+  int input_left_shift;
			
 
				+};
			
 
				+
			
 
				+TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
			
 
				+                                               TfLiteNode* node,
			
 
				+                                               OpDataLogistic* data);
			
 
				+
			
 
				+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/logistic_common.cc
@@ -0,0 +1,68 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/logistic.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+const int kLogisticInputTensor = 0;
			
 
				+const int kLogisticOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
			
 
				+                                               TfLiteNode* node,
			
 
				+                                               OpDataLogistic* data) {
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kLogisticInputTensor);
			
 
				+  TF_LITE_ENSURE(context, input != nullptr);
			
 
				+  TfLiteTensor* output = GetOutput(context, node, kLogisticOutputTensor);
			
 
				+  TF_LITE_ENSURE(context, output != nullptr);
			
 
				+
			
 
				+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
			
 
				+  if (input->type == kTfLiteInt8) {
			
 
				+    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
			
 
				+                      std::numeric_limits<int8_t>::min());
			
 
				+
			
 
				+    static constexpr int kInputIntegerBits = 4;
			
 
				+    const double input_real_multiplier =
			
 
				+        static_cast<double>(input->params.scale) *
			
 
				+        static_cast<double>(1 << (31 - kInputIntegerBits));
			
 
				+
			
 
				+    data->input_zero_point = input->params.zero_point;
			
 
				+
			
 
				+    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
			
 
				+    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
			
 
				+
			
 
				+    data->input_range_radius =
			
 
				+        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
			
 
				+  }
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				+  OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
			
 
				+
			
 
				+  return CalculateArithmeticOpDataLogistic(context, node, data);
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/micro_ops.h
@@ -1,4 +1,4 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				 
			
 
				 Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 you may not use this file except in compliance with the License.
			
@@ -32,23 +32,40 @@ namespace tflite {
 
				 // have their Register function declarations in the tflite namespace.
			
 
				 
			
 
				 TfLiteRegistration Register_ADD_N();
			
 
				+TfLiteRegistration Register_AVERAGE_POOL_2D();
			
 
				 TfLiteRegistration Register_BATCH_TO_SPACE_ND();
			
 
				 TfLiteRegistration Register_CAST();
			
 
				-TfLiteRegistration Register_CONV_2D();
			
 
				+TfLiteRegistration Register_CUMSUM();
			
 
				+TfLiteRegistration Register_DEPTH_TO_SPACE();
			
 
				 TfLiteRegistration Register_DEPTHWISE_CONV_2D();
			
 
				 TfLiteRegistration Register_DIV();
			
 
				 TfLiteRegistration Register_ELU();
			
 
				 TfLiteRegistration Register_EXP();
			
 
				 TfLiteRegistration Register_EXPAND_DIMS();
			
 
				 TfLiteRegistration Register_FILL();
			
 
				+TfLiteRegistration Register_FLOOR_DIV();
			
 
				+TfLiteRegistration Register_FLOOR_MOD();
			
 
				+TfLiteRegistration Register_GATHER();
			
 
				+TfLiteRegistration Register_GATHER_ND();
			
 
				+TfLiteRegistration Register_HARD_SWISH();
			
 
				+TfLiteRegistration Register_IF();
			
 
				 TfLiteRegistration Register_L2_POOL_2D();
			
 
				 TfLiteRegistration Register_LEAKY_RELU();
			
 
				+TfLiteRegistration Register_LOG_SOFTMAX();
			
 
				+TfLiteRegistration Register_LOGICAL_AND();
			
 
				+TfLiteRegistration Register_LOGICAL_OR();
			
 
				+TfLiteRegistration Register_LOGISTIC();
			
 
				+TfLiteRegistration Register_MAX_POOL_2D();
			
 
				 TfLiteRegistration Register_QUANTIZE();
			
 
				+TfLiteRegistration Register_RELU();
			
 
				+TfLiteRegistration Register_RELU6();
			
 
				+TfLiteRegistration Register_RESIZE_BILINEAR();
			
 
				 TfLiteRegistration Register_SHAPE();
			
 
				-TfLiteRegistration Register_SOFTMAX();
			
 
				 TfLiteRegistration Register_SPACE_TO_BATCH_ND();
			
 
				+TfLiteRegistration Register_SPACE_TO_DEPTH();
			
 
				 TfLiteRegistration Register_SQUEEZE();
			
 
				 TfLiteRegistration Register_SVDF();
			
 
				+TfLiteRegistration Register_TRANSPOSE();
			
 
				 TfLiteRegistration Register_TRANSPOSE_CONV();
			
 
				 TfLiteRegistration Register_ZEROS_LIKE();
			
 
				 
			
@@ -59,7 +76,6 @@ TfLiteRegistration Register_ABS();
 
				 TfLiteRegistration Register_ADD();
			
 
				 TfLiteRegistration Register_ARG_MAX();
			
 
				 TfLiteRegistration Register_ARG_MIN();
			
 
				-TfLiteRegistration Register_AVERAGE_POOL_2D();
			
 
				 TfLiteRegistration Register_CEIL();
			
 
				 // TODO(b/160234179): Change custom OPs to also return by value.
			
 
				 TfLiteRegistration* Register_CIRCULAR_BUFFER();
			
@@ -70,16 +86,11 @@ TfLiteRegistration Register_EQUAL();
 
				 TfLiteRegistration Register_FLOOR();
			
 
				 TfLiteRegistration Register_GREATER();
			
 
				 TfLiteRegistration Register_GREATER_EQUAL();
			
 
				-TfLiteRegistration Register_HARD_SWISH();
			
 
				 TfLiteRegistration Register_LESS();
			
 
				 TfLiteRegistration Register_LESS_EQUAL();
			
 
				 TfLiteRegistration Register_LOG();
			
 
				-TfLiteRegistration Register_LOGICAL_AND();
			
 
				 TfLiteRegistration Register_LOGICAL_NOT();
			
 
				-TfLiteRegistration Register_LOGICAL_OR();
			
 
				-TfLiteRegistration Register_LOGISTIC();
			
 
				 TfLiteRegistration Register_MAXIMUM();
			
 
				-TfLiteRegistration Register_MAX_POOL_2D();
			
 
				 TfLiteRegistration Register_MEAN();
			
 
				 TfLiteRegistration Register_MINIMUM();
			
 
				 TfLiteRegistration Register_MUL();
			
@@ -90,8 +101,6 @@ TfLiteRegistration Register_PAD();
 
				 TfLiteRegistration Register_PADV2();
			
 
				 TfLiteRegistration Register_PRELU();
			
 
				 TfLiteRegistration Register_REDUCE_MAX();
			
 
				-TfLiteRegistration Register_RELU();
			
 
				-TfLiteRegistration Register_RELU6();
			
 
				 TfLiteRegistration Register_RESHAPE();
			
 
				 TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
			
 
				 TfLiteRegistration Register_ROUND();
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/mul.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/mul.cc
@@ -62,7 +62,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
 
				 
			
 
				   TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
			
 
				 
			
 
				-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
			
 
				+  if (output->type == kTfLiteInt8) {
			
 
				     TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
			
 
				         context, params->activation, output, &data->output_activation_min,
			
 
				         &data->output_activation_max));
			
@@ -104,42 +104,21 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
 
				       tflite::micro::GetTensorShape(input1),
			
 
				       tflite::micro::GetTensorShape(input2), &op_params);
			
 
				 
			
 
				-  if (output->type == kTfLiteInt8) {
			
 
				-    if (need_broadcast) {
			
 
				-      reference_integer_ops::BroadcastMul4DSlow(
			
 
				-          op_params, tflite::micro::GetTensorShape(input1),
			
 
				-          tflite::micro::GetTensorData<int8_t>(input1),
			
 
				-          tflite::micro::GetTensorShape(input2),
			
 
				-          tflite::micro::GetTensorData<int8_t>(input2),
			
 
				-          tflite::micro::GetTensorShape(output),
			
 
				-          tflite::micro::GetTensorData<int8_t>(output));
			
 
				-    } else {
			
 
				-      reference_integer_ops::Mul(op_params,
			
 
				-                                 tflite::micro::GetTensorShape(input1),
			
 
				-                                 tflite::micro::GetTensorData<int8_t>(input1),
			
 
				-                                 tflite::micro::GetTensorShape(input2),
			
 
				-                                 tflite::micro::GetTensorData<int8_t>(input2),
			
 
				-                                 tflite::micro::GetTensorShape(output),
			
 
				-                                 tflite::micro::GetTensorData<int8_t>(output));
			
 
				-    }
			
 
				-  } else if (output->type == kTfLiteUInt8) {
			
 
				-    if (need_broadcast) {
			
 
				-      reference_integer_ops::BroadcastMul4DSlow(
			
 
				-          op_params, tflite::micro::GetTensorShape(input1),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(input1),
			
 
				-          tflite::micro::GetTensorShape(input2),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(input2),
			
 
				-          tflite::micro::GetTensorShape(output),
			
 
				-          tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-    } else {
			
 
				-      reference_integer_ops::Mul(op_params,
			
 
				-                                 tflite::micro::GetTensorShape(input1),
			
 
				-                                 tflite::micro::GetTensorData<uint8_t>(input1),
			
 
				-                                 tflite::micro::GetTensorShape(input2),
			
 
				-                                 tflite::micro::GetTensorData<uint8_t>(input2),
			
 
				-                                 tflite::micro::GetTensorShape(output),
			
 
				-                                 tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-    }
			
 
				+  if (need_broadcast) {
			
 
				+    reference_integer_ops::BroadcastMul4DSlow(
			
 
				+        op_params, tflite::micro::GetTensorShape(input1),
			
 
				+        tflite::micro::GetTensorData<int8_t>(input1),
			
 
				+        tflite::micro::GetTensorShape(input2),
			
 
				+        tflite::micro::GetTensorData<int8_t>(input2),
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<int8_t>(output));
			
 
				+  } else {
			
 
				+    reference_integer_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
			
 
				+                               tflite::micro::GetTensorData<int8_t>(input1),
			
 
				+                               tflite::micro::GetTensorShape(input2),
			
 
				+                               tflite::micro::GetTensorData<int8_t>(input2),
			
 
				+                               tflite::micro::GetTensorShape(output),
			
 
				+                               tflite::micro::GetTensorData<int8_t>(output));
			
 
				   }
			
 
				 }
			
 
				 
			
@@ -203,7 +182,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
				       tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				 
			
 
				   switch (input1->type) {
			
 
				-    case kTfLiteUInt8:
			
 
				     case kTfLiteInt8:
			
 
				       EvalQuantized(context, node, data, input1, input2, output);
			
 
				       break;
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.cc
@@ -1,4 +1,4 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				 
			
 
				 Licensed under the Apache License, Version 2.0 (the "License");
			
 
				 you may not use this file except in compliance with the License.
			
@@ -15,163 +15,34 @@ limitations under the License.
 
				 #include "tensorflow/lite/kernels/internal/reference/pooling.h"
			
 
				 
			
 
				 #include "tensorflow/lite/c/builtin_op_data.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
			
 
				-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				 #include "tensorflow/lite/kernels/kernel_util.h"
			
 
				-#include "tensorflow/lite/kernels/padding.h"
			
 
				 #include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/pooling.h"
			
 
				 
			
 
				 namespace tflite {
			
 
				-namespace ops {
			
 
				-namespace micro {
			
 
				-namespace pooling {
			
 
				 
			
 
				 namespace {
			
 
				 
			
 
				-constexpr int kInputTensor = 0;
			
 
				-constexpr int kOutputTensor = 0;
			
 
				-
			
 
				-struct OpData {
			
 
				-  TfLitePaddingValues padding;
			
 
				-  int32_t activation_min;
			
 
				-  int32_t activation_max;
			
 
				-  float activation_min_f32;
			
 
				-  float activation_max_f32;
			
 
				-};
			
 
				-
			
 
				-TfLiteStatus CalculateOpData(const TfLiteContext* context,
			
 
				-                             const TfLitePoolParams* params,
			
 
				-                             const TfLiteTensor* input,
			
 
				-                             const TfLiteTensor* output, OpData* data) {
			
 
				-  // input: batch, height, width, channel
			
 
				-  int height = SizeOfDimension(input, 1);
			
 
				-  int width = SizeOfDimension(input, 2);
			
 
				-
			
 
				-  int out_height, out_width;
			
 
				-
			
 
				-  data->padding = ComputePaddingHeightWidth(
			
 
				-      params->stride_height, params->stride_width,
			
 
				-      /*dilation_rate_height=*/1,
			
 
				-      /*dilation_rate_width=*/1, height, width, params->filter_height,
			
 
				-      params->filter_width, params->padding, &out_height, &out_width);
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
			
 
				-                      const TfLitePoolParams* params, const OpData* data,
			
 
				-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
			
 
				-  PoolParams op_params;
			
 
				-  op_params.stride_height = params->stride_height;
			
 
				-  op_params.stride_width = params->stride_width;
			
 
				-  op_params.filter_height = params->filter_height;
			
 
				-  op_params.filter_width = params->filter_width;
			
 
				-  op_params.padding_values.height = data->padding.height;
			
 
				-  op_params.padding_values.width = data->padding.width;
			
 
				-  op_params.float_activation_min = data->activation_min_f32;
			
 
				-  op_params.float_activation_max = data->activation_max_f32;
			
 
				-  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
			
 
				-                             tflite::micro::GetTensorData<float>(input),
			
 
				-                             tflite::micro::GetTensorShape(output),
			
 
				-                             tflite::micro::GetTensorData<float>(output));
			
 
				-}
			
 
				-
			
 
				-void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
			
 
				-                          const TfLitePoolParams* params, const OpData* data,
			
 
				-                          const TfLiteEvalTensor* input,
			
 
				-                          TfLiteEvalTensor* output) {
			
 
				-  TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
			
 
				-
			
 
				-  PoolParams op_params;
			
 
				-  op_params.stride_height = params->stride_height;
			
 
				-  op_params.stride_width = params->stride_width;
			
 
				-  op_params.filter_height = params->filter_height;
			
 
				-  op_params.filter_width = params->filter_width;
			
 
				-  op_params.padding_values.height = data->padding.height;
			
 
				-  op_params.padding_values.width = data->padding.width;
			
 
				-  op_params.quantized_activation_min = data->activation_min;
			
 
				-  op_params.quantized_activation_max = data->activation_max;
			
 
				-
			
 
				-  if (input->type == kTfLiteUInt8) {
			
 
				-    reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
			
 
				-                               tflite::micro::GetTensorData<uint8_t>(input),
			
 
				-                               tflite::micro::GetTensorShape(output),
			
 
				-                               tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-  } else {
			
 
				-    reference_integer_ops::AveragePool(
			
 
				-        op_params, tflite::micro::GetTensorShape(input),
			
 
				-        tflite::micro::GetTensorData<int8_t>(input),
			
 
				-        tflite::micro::GetTensorShape(output),
			
 
				-        tflite::micro::GetTensorData<int8_t>(output));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
			
 
				-                  TfLitePoolParams* params, const OpData* data,
			
 
				-                  const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
			
 
				-  tflite::PoolParams op_params;
			
 
				-  op_params.stride_height = params->stride_height;
			
 
				-  op_params.stride_width = params->stride_width;
			
 
				-  op_params.filter_height = params->filter_height;
			
 
				-  op_params.filter_width = params->filter_width;
			
 
				-  op_params.padding_values.height = data->padding.height;
			
 
				-  op_params.padding_values.width = data->padding.width;
			
 
				-  op_params.float_activation_min = data->activation_min_f32;
			
 
				-  op_params.float_activation_max = data->activation_max_f32;
			
 
				-  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
			
 
				-                         tflite::micro::GetTensorData<float>(input),
			
 
				-                         tflite::micro::GetTensorShape(output),
			
 
				-                         tflite::micro::GetTensorData<float>(output));
			
 
				-}
			
 
				-
			
 
				-void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
			
 
				-                      TfLitePoolParams* params, const OpData* data,
			
 
				-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
			
 
				-  tflite::PoolParams op_params;
			
 
				-  op_params.stride_height = params->stride_height;
			
 
				-  op_params.stride_width = params->stride_width;
			
 
				-  op_params.filter_height = params->filter_height;
			
 
				-  op_params.filter_width = params->filter_width;
			
 
				-  op_params.padding_values.height = data->padding.height;
			
 
				-  op_params.padding_values.width = data->padding.width;
			
 
				-  op_params.quantized_activation_min = data->activation_min;
			
 
				-  op_params.quantized_activation_max = data->activation_max;
			
 
				-
			
 
				-  if (input->type == kTfLiteUInt8) {
			
 
				-    reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
			
 
				-                           tflite::micro::GetTensorData<uint8_t>(input),
			
 
				-                           tflite::micro::GetTensorShape(output),
			
 
				-                           tflite::micro::GetTensorData<uint8_t>(output));
			
 
				-  } else {
			
 
				-    reference_integer_ops::MaxPool(
			
 
				-        op_params, tflite::micro::GetTensorShape(input),
			
 
				-        tflite::micro::GetTensorData<int8_t>(input),
			
 
				-        tflite::micro::GetTensorShape(output),
			
 
				-        tflite::micro::GetTensorData<int8_t>(output));
			
 
				-  }
			
 
				-}
			
 
				-}  // namespace
			
 
				-
			
 
				 TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
			
 
				   TFLITE_DCHECK(node->builtin_data != nullptr);
			
 
				   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
			
 
				 
			
 
				   TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  const OpData* data = static_cast<const OpData*>(node->user_data);
			
 
				+  const OpDataPooling* data =
			
 
				+      static_cast<const OpDataPooling*>(node->user_data);
			
 
				 
			
 
				   const TfLiteEvalTensor* input =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+      micro::GetEvalInput(context, node, kPoolingInputTensor);
			
 
				   TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
			
 
				 
			
 
				   // Inputs and outputs share the same type, guaranteed by the converter.
			
 
				   switch (input->type) {
			
 
				     case kTfLiteFloat32:
			
 
				-      AverageEvalFloat(context, node, params, data, input, output);
			
 
				+      AveragePoolingEvalFloat(context, node, params, data, input, output);
			
 
				       break;
			
 
				-    case kTfLiteUInt8:
			
 
				     case kTfLiteInt8:
			
 
				-      AverageEvalQuantized(context, node, params, data, input, output);
			
 
				+      AveragePoolingEvalQuantized(context, node, params, data, input, output);
			
 
				       break;
			
 
				     default:
			
 
				       TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
			
@@ -186,20 +57,20 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
 
				   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
			
 
				 
			
 
				   TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  const OpData* data = static_cast<const OpData*>(node->user_data);
			
 
				+  const OpDataPooling* data =
			
 
				+      static_cast<const OpDataPooling*>(node->user_data);
			
 
				 
			
 
				   const TfLiteEvalTensor* input =
			
 
				-      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+      micro::GetEvalInput(context, node, kPoolingInputTensor);
			
 
				   TfLiteEvalTensor* output =
			
 
				-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
			
 
				 
			
 
				   switch (input->type) {
			
 
				     case kTfLiteFloat32:
			
 
				-      MaxEvalFloat(context, node, params, data, input, output);
			
 
				+      MaxPoolingEvalFloat(context, node, params, data, input, output);
			
 
				       break;
			
 
				-    case kTfLiteUInt8:
			
 
				     case kTfLiteInt8:
			
 
				-      MaxEvalQuantized(context, node, params, data, input, output);
			
 
				+      MaxPoolingEvalQuantized(context, node, params, data, input, output);
			
 
				       break;
			
 
				     default:
			
 
				       TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
			
@@ -211,42 +82,16 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
 
				 
			
 
				 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
			
 
				   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
			
 
				-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
			
 
				+  return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
			
 
				 }
			
 
				 
			
 
				-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				-  TFLITE_DCHECK(node->builtin_data != nullptr);
			
 
				-  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
			
 
				-
			
 
				-  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				-  OpData* data = static_cast<OpData*>(node->user_data);
			
 
				-
			
 
				-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				-  TF_LITE_ENSURE(context, input != nullptr);
			
 
				-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
			
 
				-  TF_LITE_ENSURE(context, output != nullptr);
			
 
				-
			
 
				-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
			
 
				-
			
 
				-  if (input->type == kTfLiteFloat32) {
			
 
				-    CalculateActivationRange(params->activation, &data->activation_min_f32,
			
 
				-                             &data->activation_max_f32);
			
 
				-  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
			
 
				-    CalculateActivationRangeQuantized(context, params->activation, output,
			
 
				-                                      &data->activation_min,
			
 
				-                                      &data->activation_max);
			
 
				-  }
			
 
				-
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-}  // namespace pooling
			
 
				+}  // namespace
			
 
				 
			
 
				 TfLiteRegistration Register_AVERAGE_POOL_2D() {
			
 
				-  return {/*init=*/pooling::Init,
			
 
				+  return {/*init=*/Init,
			
 
				           /*free=*/nullptr,
			
 
				-          /*prepare=*/pooling::Prepare,
			
 
				-          /*invoke=*/pooling::AverageEval,
			
 
				+          /*prepare=*/PoolingPrepare,
			
 
				+          /*invoke=*/AverageEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
@@ -254,16 +99,14 @@ TfLiteRegistration Register_AVERAGE_POOL_2D() {
 
				 }
			
 
				 
			
 
				 TfLiteRegistration Register_MAX_POOL_2D() {
			
 
				-  return {/*init=*/pooling::Init,
			
 
				+  return {/*init=*/Init,
			
 
				           /*free=*/nullptr,
			
 
				-          /*prepare=*/pooling::Prepare,
			
 
				-          /*invoke=*/pooling::MaxEval,
			
 
				+          /*prepare=*/PoolingPrepare,
			
 
				+          /*invoke=*/MaxEval,
			
 
				           /*profiling_string=*/nullptr,
			
 
				           /*builtin_code=*/0,
			
 
				           /*custom_name=*/nullptr,
			
 
				           /*version=*/0};
			
 
				 }
			
 
				 
			
 
				-}  // namespace micro
			
 
				-}  // namespace ops
			
 
				 }  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.h
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling.h
@@ -0,0 +1,71 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
			
 
				+#define TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+extern const int kPoolingInputTensor;
			
 
				+extern const int kPoolingOutputTensor;
			
 
				+
			
 
				+struct OpDataPooling {
			
 
				+  TfLitePaddingValues padding;
			
 
				+  int32_t activation_min;
			
 
				+  int32_t activation_max;
			
 
				+  float activation_min_f32;
			
 
				+  float activation_max_f32;
			
 
				+};
			
 
				+
			
 
				+TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
			
 
				+                                    const TfLitePoolParams* params,
			
 
				+                                    const TfLiteTensor* input,
			
 
				+                                    const TfLiteTensor* output,
			
 
				+                                    OpDataPooling* data);
			
 
				+
			
 
				+TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node);
			
 
				+
			
 
				+void AveragePoolingEvalFloat(const TfLiteContext* context,
			
 
				+                             const TfLiteNode* node,
			
 
				+                             const TfLitePoolParams* params,
			
 
				+                             const OpDataPooling* data,
			
 
				+                             const TfLiteEvalTensor* input,
			
 
				+                             TfLiteEvalTensor* output);
			
 
				+
			
 
				+void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
			
 
				+                                 const TfLitePoolParams* params,
			
 
				+                                 const OpDataPooling* data,
			
 
				+                                 const TfLiteEvalTensor* input,
			
 
				+                                 TfLiteEvalTensor* output);
			
 
				+
			
 
				+void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
			
 
				+                         TfLitePoolParams* params, const OpDataPooling* data,
			
 
				+                         const TfLiteEvalTensor* input,
			
 
				+                         TfLiteEvalTensor* output);
			
 
				+
			
 
				+void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
			
 
				+                             TfLitePoolParams* params,
			
 
				+                             const OpDataPooling* data,
			
 
				+                             const TfLiteEvalTensor* input,
			
 
				+                             TfLiteEvalTensor* output);
			
 
				+
			
 
				+}  // namespace tflite
			
 
				+
			
 
				+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/pooling_common.cc
@@ -0,0 +1,163 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
			
 
				+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/kernels/padding.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/kernels/pooling.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+
			
 
				+const int kPoolingInputTensor = 0;
			
 
				+const int kPoolingOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
			
 
				+                                    const TfLitePoolParams* params,
			
 
				+                                    const TfLiteTensor* input,
			
 
				+                                    const TfLiteTensor* output,
			
 
				+                                    OpDataPooling* data) {
			
 
				+  // input: batch, height, width, channel
			
 
				+  int height = SizeOfDimension(input, 1);
			
 
				+  int width = SizeOfDimension(input, 2);
			
 
				+
			
 
				+  int out_height, out_width;
			
 
				+
			
 
				+  data->padding = ComputePaddingHeightWidth(
			
 
				+      params->stride_height, params->stride_width,
			
 
				+      /*dilation_rate_height=*/1,
			
 
				+      /*dilation_rate_width=*/1, height, width, params->filter_height,
			
 
				+      params->filter_width, params->padding, &out_height, &out_width);
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TFLITE_DCHECK(node->builtin_data != nullptr);
			
 
				+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
			
 
				+
			
 
				+  TFLITE_DCHECK(node->user_data != nullptr);
			
 
				+  OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
			
 
				+
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kPoolingInputTensor);
			
 
				+  TF_LITE_ENSURE(context, input != nullptr);
			
 
				+  TfLiteTensor* output = GetOutput(context, node, kPoolingOutputTensor);
			
 
				+  TF_LITE_ENSURE(context, output != nullptr);
			
 
				+
			
 
				+  TF_LITE_ENSURE_STATUS(
			
 
				+      CalculateOpDataPooling(context, params, input, output, data));
			
 
				+
			
 
				+  if (input->type == kTfLiteFloat32) {
			
 
				+    CalculateActivationRange(params->activation, &data->activation_min_f32,
			
 
				+                             &data->activation_max_f32);
			
 
				+  } else if (input->type == kTfLiteInt8) {
			
 
				+    CalculateActivationRangeQuantized(context, params->activation, output,
			
 
				+                                      &data->activation_min,
			
 
				+                                      &data->activation_max);
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+void AveragePoolingEvalFloat(const TfLiteContext* context,
			
 
				+                             const TfLiteNode* node,
			
 
				+                             const TfLitePoolParams* params,
			
 
				+                             const OpDataPooling* data,
			
 
				+                             const TfLiteEvalTensor* input,
			
 
				+                             TfLiteEvalTensor* output) {
			
 
				+  PoolParams op_params;
			
 
				+  op_params.stride_height = params->stride_height;
			
 
				+  op_params.stride_width = params->stride_width;
			
 
				+  op_params.filter_height = params->filter_height;
			
 
				+  op_params.filter_width = params->filter_width;
			
 
				+  op_params.padding_values.height = data->padding.height;
			
 
				+  op_params.padding_values.width = data->padding.width;
			
 
				+  op_params.float_activation_min = data->activation_min_f32;
			
 
				+  op_params.float_activation_max = data->activation_max_f32;
			
 
				+  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
			
 
				+                             tflite::micro::GetTensorData<float>(input),
			
 
				+                             tflite::micro::GetTensorShape(output),
			
 
				+                             tflite::micro::GetTensorData<float>(output));
			
 
				+}
			
 
				+
			
 
				+void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
			
 
				+                                 const TfLitePoolParams* params,
			
 
				+                                 const OpDataPooling* data,
			
 
				+                                 const TfLiteEvalTensor* input,
			
 
				+                                 TfLiteEvalTensor* output) {
			
 
				+  TFLITE_DCHECK(input->type == kTfLiteInt8);
			
 
				+
			
 
				+  PoolParams op_params;
			
 
				+  op_params.stride_height = params->stride_height;
			
 
				+  op_params.stride_width = params->stride_width;
			
 
				+  op_params.filter_height = params->filter_height;
			
 
				+  op_params.filter_width = params->filter_width;
			
 
				+  op_params.padding_values.height = data->padding.height;
			
 
				+  op_params.padding_values.width = data->padding.width;
			
 
				+  op_params.quantized_activation_min = data->activation_min;
			
 
				+  op_params.quantized_activation_max = data->activation_max;
			
 
				+
			
 
				+  reference_integer_ops::AveragePool(
			
 
				+      op_params, tflite::micro::GetTensorShape(input),
			
 
				+      tflite::micro::GetTensorData<int8_t>(input),
			
 
				+      tflite::micro::GetTensorShape(output),
			
 
				+      tflite::micro::GetTensorData<int8_t>(output));
			
 
				+}
			
 
				+
			
 
				+void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
			
 
				+                         TfLitePoolParams* params, const OpDataPooling* data,
			
 
				+                         const TfLiteEvalTensor* input,
			
 
				+                         TfLiteEvalTensor* output) {
			
 
				+  tflite::PoolParams op_params;
			
 
				+  op_params.stride_height = params->stride_height;
			
 
				+  op_params.stride_width = params->stride_width;
			
 
				+  op_params.filter_height = params->filter_height;
			
 
				+  op_params.filter_width = params->filter_width;
			
 
				+  op_params.padding_values.height = data->padding.height;
			
 
				+  op_params.padding_values.width = data->padding.width;
			
 
				+  op_params.float_activation_min = data->activation_min_f32;
			
 
				+  op_params.float_activation_max = data->activation_max_f32;
			
 
				+  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
			
 
				+                         tflite::micro::GetTensorData<float>(input),
			
 
				+                         tflite::micro::GetTensorShape(output),
			
 
				+                         tflite::micro::GetTensorData<float>(output));
			
 
				+}
			
 
				+
			
 
				+void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
			
 
				+                             TfLitePoolParams* params,
			
 
				+                             const OpDataPooling* data,
			
 
				+                             const TfLiteEvalTensor* input,
			
 
				+                             TfLiteEvalTensor* output) {
			
 
				+  tflite::PoolParams op_params;
			
 
				+  op_params.stride_height = params->stride_height;
			
 
				+  op_params.stride_width = params->stride_width;
			
 
				+  op_params.filter_height = params->filter_height;
			
 
				+  op_params.filter_width = params->filter_width;
			
 
				+  op_params.padding_values.height = data->padding.height;
			
 
				+  op_params.padding_values.width = data->padding.width;
			
 
				+  op_params.quantized_activation_min = data->activation_min;
			
 
				+  op_params.quantized_activation_max = data->activation_max;
			
 
				+
			
 
				+  reference_integer_ops::MaxPool(op_params,
			
 
				+                                 tflite::micro::GetTensorShape(input),
			
 
				+                                 tflite::micro::GetTensorData<int8_t>(input),
			
 
				+                                 tflite::micro::GetTensorShape(output),
			
 
				+                                 tflite::micro::GetTensorData<int8_t>(output));
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/quantize_common.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/quantize_common.cc
@@ -57,6 +57,7 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
 
				 
			
 
				   if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
			
 
				       (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
			
 
				+      (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) ||
			
 
				       (input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
			
 
				       (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
			
 
				       (input->type == kTfLiteInt16 && output->type == kTfLiteInt32)) {
			
@@ -145,6 +146,13 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
 
				             data->input_zero_point, data->quantization_params.zero_point,
			
 
				             tflite::micro::GetTensorData<int8_t>(output));
			
 
				         break;
			
 
				+      case kTfLiteInt16:
			
 
				+        reference_ops::Requantize(
			
 
				+            tflite::micro::GetTensorData<int8_t>(input), size,
			
 
				+            data->requantize_output_multiplier, data->requantize_output_shift,
			
 
				+            data->input_zero_point, data->quantization_params.zero_point,
			
 
				+            tflite::micro::GetTensorData<int16_t>(output));
			
 
				+        break;
			
 
				       case kTfLiteInt32:
			
 
				         reference_ops::Requantize(
			
 
				             tflite::micro::GetTensorData<int8_t>(input), size,
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/reduce.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/reduce.cc
@@ -103,14 +103,15 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
 
				   const TfLiteTensor* input = GetInput(context, node, 0);
			
 
				   OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
			
 
				   const TfLiteTensor* output = GetOutput(context, node, 0);
			
 
				-  if (input->type == kTfLiteInt8) {
			
 
				+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
			
 
				     const double real_multiplier = static_cast<double>(input->params.scale) /
			
 
				                                    static_cast<double>(output->params.scale);
			
 
				     QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
			
 
				   }
			
 
				 
			
 
				   int output_size = NumElements(output);
			
 
				-  if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
			
 
				+  if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
			
 
				+      input->type == kTfLiteInt16) {
			
 
				     context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
			
 
				                                          &op_data->temp_buffer_idx);
			
 
				     op_data->input_zp = input->params.zero_point;
			
@@ -213,6 +214,43 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
 
				                 temp_buffer, false));
			
 
				       }
			
 
				     } break;
			
 
				+    case kTfLiteInt16: {
			
 
				+      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
			
 
				+      if (params->keep_dims && special_case_4d_axes_1_and_2) {
			
 
				+        reference_integer_ops::Mean(
			
 
				+            op_params, op_data->multiplier, op_data->shift,
			
 
				+            tflite::micro::GetTensorShape(input),
			
 
				+            tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
			
 
				+            tflite::micro::GetTensorShape(output),
			
 
				+            tflite::micro::GetTensorData<int16_t>(output), op_data->output_zp);
			
 
				+      } else if (op_data->input_zp == op_data->output_zp &&
			
 
				+                 op_data->input_scale == op_data->output_scale) {
			
 
				+        int32_t* temp_buffer = static_cast<int32_t*>(
			
 
				+            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
			
 
				+        TF_LITE_ENSURE(
			
 
				+            context,
			
 
				+            reference_ops::Mean(tflite::micro::GetTensorData<int16_t>(input),
			
 
				+                                input->dims->data, input->dims->size,
			
 
				+                                tflite::micro::GetTensorData<int16_t>(output),
			
 
				+                                output->dims->data, output->dims->size,
			
 
				+                                tflite::micro::GetTensorData<int>(axis),
			
 
				+                                num_axis, params->keep_dims, temp_index,
			
 
				+                                resolved_axis, temp_buffer));
			
 
				+      } else {
			
 
				+        int32_t* temp_buffer = static_cast<int32_t*>(
			
 
				+            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
			
 
				+        TF_LITE_ENSURE(
			
 
				+            context,
			
 
				+            reference_ops::QuantizedMeanOrSum(
			
 
				+                tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
			
 
				+                op_data->input_scale, input->dims->data, input->dims->size,
			
 
				+                tflite::micro::GetTensorData<int16_t>(output),
			
 
				+                op_data->output_zp, op_data->output_scale, output->dims->data,
			
 
				+                output->dims->size, tflite::micro::GetTensorData<int>(axis),
			
 
				+                num_axis, params->keep_dims, temp_index, resolved_axis,
			
 
				+                temp_buffer, false));
			
 
				+      }
			
 
				+    } break;
			
 
				     case kTfLiteUInt8: {
			
 
				       // Defer to specialized implementation for 4D Mean across axes 1 & 2.
			
 
				       if (params->keep_dims && special_case_4d_axes_1_and_2) {
			
--- a/code/components/tfmicro/tensorflow/lite/micro/kernels/resize_bilinear.cc
+++ b/code/components/tfmicro/tensorflow/lite/micro/kernels/resize_bilinear.cc
@@ -0,0 +1,116 @@
 
				+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+==============================================================================*/
			
 
				+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
			
 
				+
			
 
				+#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				+#include "tensorflow/lite/c/common.h"
			
 
				+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				+#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/kernels/op_macros.h"
			
 
				+#include "tensorflow/lite/micro/kernels/kernel_util.h"
			
 
				+#include "tensorflow/lite/micro/micro_utils.h"
			
 
				+
			
 
				+namespace tflite {
			
 
				+namespace {
			
 
				+
			
 
				+constexpr int kInputTensor = 0;
			
 
				+constexpr int kSizeTensor = 1;
			
 
				+constexpr int kOutputTensor = 0;
			
 
				+
			
 
				+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
			
 
				+
			
 
				+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
			
 
				+  const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
			
 
				+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
			
 
				+  TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
			
 
				+
			
 
				+  TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
			
 
				+  output->type = input->type;
			
 
				+
			
 
				+  TF_LITE_ENSURE_MSG(context, IsConstantTensor(size),
			
 
				+                     "Non constant size tensor not supported");
			
 
				+
			
 
				+  // Ensure params are valid.
			
 
				+  auto* params =
			
 
				+      reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
			
 
				+  if (params->half_pixel_centers && params->align_corners) {
			
 
				+    TF_LITE_KERNEL_LOG(
			
 
				+        context, "If half_pixel_centers is True, align_corners must be False.");
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
			
 
				+  auto* params =
			
 
				+      reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
			
 
				+
			
 
				+  const TfLiteEvalTensor* input =
			
 
				+      tflite::micro::GetEvalInput(context, node, kInputTensor);
			
 
				+  const TfLiteEvalTensor* size =
			
 
				+      tflite::micro::GetEvalInput(context, node, kSizeTensor);
			
 
				+  TfLiteEvalTensor* output =
			
 
				+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
			
 
				+
			
 
				+  if (output->type == kTfLiteFloat32) {
			
 
				+    tflite::ResizeBilinearParams op_params;
			
 
				+    op_params.align_corners = params->align_corners;
			
 
				+    op_params.half_pixel_centers = params->half_pixel_centers;
			
 
				+    reference_ops::ResizeBilinear(op_params,
			
 
				+                                  tflite::micro::GetTensorShape(input),
			
 
				+                                  tflite::micro::GetTensorData<float>(input),
			
 
				+                                  tflite::micro::GetTensorShape(size),
			
 
				+                                  tflite::micro::GetTensorData<int32_t>(size),
			
 
				+                                  tflite::micro::GetTensorShape(output),
			
 
				+                                  tflite::micro::GetTensorData<float>(output));
			
 
				+  } else if (output->type == kTfLiteInt8) {
			
 
				+    tflite::ResizeBilinearParams op_params;
			
 
				+    op_params.align_corners = params->align_corners;
			
 
				+    op_params.half_pixel_centers = params->half_pixel_centers;
			
 
				+    reference_ops::ResizeBilinearInteger(
			
 
				+        op_params, tflite::micro::GetTensorShape(input),
			
 
				+        tflite::micro::GetTensorData<int8_t>(input),
			
 
				+        tflite::micro::GetTensorShape(size),
			
 
				+        tflite::micro::GetTensorData<int32_t>(size),
			
 
				+        tflite::micro::GetTensorShape(output),
			
 
				+        tflite::micro::GetTensorData<int8_t>(output));
			
 
				+  } else {
			
 
				+    TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float or int8.",
			
 
				+                       output->type);
			
 
				+    return kTfLiteError;
			
 
				+  }
			
 
				+
			
 
				+  return kTfLiteOk;
			
 
				+}
			
 
				+
			
 
				+}  // namespace
			
 
				+
			
 
				+TfLiteRegistration Register_RESIZE_BILINEAR() {
			
 
				+  return {/*init=*/nullptr,
			
 
				+          /*free=*/nullptr,
			
 
				+          /*prepare=*/Prepare,
			
 
				+          /*invoke=*/Eval,
			
 
				+          /*profiling_string=*/nullptr,
			
 
				+          /*builtin_code=*/0,
			
 
				+          /*custom_name=*/nullptr,
			
 
				+          /*version=*/0};
			
 
				+}
			
 
				+
			
 
				+}  // namespace tflite