3 лет назад · 75a653a5c7
--- a/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h
+++ b/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h
@@ -1,22 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-// Compatibility shim for new location of interface definitions.
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
			
 
				-#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
			
 
				-
			
 
				-#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h
+++ b/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h
@@ -1,525 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
			
 
				-#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif  // __cplusplus
			
 
				-
			
 
				-// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
			
 
				-// number of dimensions.
			
 
				-#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
			
 
				-
			
 
				-// TODO(aselle): Consider using "if this then that" for testing.
			
 
				-
			
 
				-// Useful placeholder to put in otherwise empty structs to avoid size warnings.
			
 
				-typedef struct {
			
 
				-  char dummy;
			
 
				-} EmptyStructPlaceholder;
			
 
				-
			
 
				-// IMPORTANT: All new members of structs must be added at the end to ensure
			
 
				-// backwards compatibility.
			
 
				-
			
 
				-// Possible padding types (for convolutions)
			
 
				-typedef enum {
			
 
				-  kTfLitePaddingUnknown = 0,
			
 
				-  kTfLitePaddingSame,
			
 
				-  kTfLitePaddingValid,
			
 
				-} TfLitePadding;
			
 
				-
			
 
				-typedef enum {
			
 
				-  kTfLiteMirrorPaddingUnknown = 0,
			
 
				-  kTfLiteMirrorPaddingReflect,
			
 
				-  kTfLiteMirrorPaddingSymmetric,
			
 
				-} TfLiteMirrorPaddingMode;
			
 
				-
			
 
				-// TODO(b/130259536): We should move this out of builtin_op_data.
			
 
				-typedef struct {
			
 
				-  int width;
			
 
				-  int height;
			
 
				-  int width_offset;
			
 
				-  int height_offset;
			
 
				-} TfLitePaddingValues;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteMirrorPaddingMode mode;
			
 
				-} TfLiteMirrorPaddingParams;
			
 
				-
			
 
				-// Possible fused activation functions.
			
 
				-typedef enum {
			
 
				-  kTfLiteActNone = 0,
			
 
				-  kTfLiteActRelu,
			
 
				-  kTfLiteActReluN1To1,  // min(max(-1, x), 1)
			
 
				-  kTfLiteActRelu6,      // min(max(0, x), 6)
			
 
				-  kTfLiteActTanh,
			
 
				-  kTfLiteActSignBit,
			
 
				-  kTfLiteActSigmoid,
			
 
				-} TfLiteFusedActivation;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters for CONV_2D version 1.
			
 
				-  TfLitePadding padding;
			
 
				-  int stride_width;
			
 
				-  int stride_height;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-
			
 
				-  // Parameters for CONV_2D version 2.
			
 
				-  // Note: Version 2 supports dilation values not equal to 1.
			
 
				-  int dilation_width_factor;
			
 
				-  int dilation_height_factor;
			
 
				-} TfLiteConvParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLitePadding padding;
			
 
				-  int stride_width;
			
 
				-  int stride_height;
			
 
				-  int stride_depth;
			
 
				-  int dilation_width_factor;
			
 
				-  int dilation_height_factor;
			
 
				-  int dilation_depth_factor;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-} TfLiteConv3DParams;
			
 
				-
			
 
				-typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLitePadding padding;
			
 
				-  int stride_width;
			
 
				-  int stride_height;
			
 
				-  int filter_width;
			
 
				-  int filter_height;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  struct {
			
 
				-    TfLitePaddingValues padding;
			
 
				-  } computed;
			
 
				-} TfLitePoolParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters for DepthwiseConv version 1 or above.
			
 
				-  TfLitePadding padding;
			
 
				-  int stride_width;
			
 
				-  int stride_height;
			
 
				-  // `depth_multiplier` is redundant. It's used by CPU kernels in
			
 
				-  // TensorFlow 2.0 or below, but ignored in versions above.
			
 
				-  //
			
 
				-  // The information can be deduced from the shape of input and the shape of
			
 
				-  // weights. Since the TFLiteConverter toolchain doesn't support partially
			
 
				-  // specified shapes, relying on `depth_multiplier` stops us from supporting
			
 
				-  // graphs with dynamic shape tensors.
			
 
				-  //
			
 
				-  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
			
 
				-  // field.
			
 
				-  int depth_multiplier;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  // Parameters for DepthwiseConv version 2 or above.
			
 
				-  int dilation_width_factor;
			
 
				-  int dilation_height_factor;
			
 
				-} TfLiteDepthwiseConvParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int rank;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-
			
 
				-  // Parameter for SVDF version 4.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteSVDFParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteFusedActivation activation;
			
 
				-
			
 
				-  // Parameter for RNN version 3.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteRNNParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool time_major;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-
			
 
				-  // Parameter for Sequence RNN version 3.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteSequenceRNNParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool time_major;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  bool merge_outputs;
			
 
				-
			
 
				-  // Parameter for Bidirectional RNN verison 3.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteBidirectionalSequenceRNNParams;
			
 
				-
			
 
				-typedef enum {
			
 
				-  kTfLiteFullyConnectedWeightsFormatDefault = 0,
			
 
				-  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
			
 
				-} TfLiteFullyConnectedWeightsFormat;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters for FullyConnected version 1 or above.
			
 
				-  TfLiteFusedActivation activation;
			
 
				-
			
 
				-  // Parameters for FullyConnected version 2 or above.
			
 
				-  TfLiteFullyConnectedWeightsFormat weights_format;
			
 
				-
			
 
				-  // Parameters for FullyConnected version 5 or above.
			
 
				-  // If set to true, then the number of dimensions in the input and the output
			
 
				-  // tensors are the same. Furthermore, all but the last dimension of the input
			
 
				-  // and output shapes will be equal.
			
 
				-  bool keep_num_dims;
			
 
				-
			
 
				-  // Parameters for FullyConnected version 7 or above.
			
 
				-  // If set to true and the weights are quantized, then non constant inputs
			
 
				-  // are quantized at evaluation time with asymmetric quantization.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteFullyConnectedParams;
			
 
				-
			
 
				-typedef enum {
			
 
				-  kTfLiteLshProjectionUnknown = 0,
			
 
				-  kTfLiteLshProjectionSparse = 1,
			
 
				-  kTfLiteLshProjectionDense = 2,
			
 
				-} TfLiteLSHProjectionType;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteLSHProjectionType type;
			
 
				-} TfLiteLSHProjectionParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  float beta;
			
 
				-} TfLiteSoftmaxParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int axis;
			
 
				-  TfLiteFusedActivation activation;
			
 
				-} TfLiteConcatenationParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  // Parameter added for the version 4.
			
 
				-  bool pot_scale_int16;
			
 
				-} TfLiteAddParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLiteSpaceToBatchNDParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLiteBatchToSpaceNDParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool adj_x;
			
 
				-  bool adj_y;
			
 
				-  // Parameters for BatchMatMul version 4 or above.
			
 
				-  // If set to true and the weights are quantized, then non constant inputs
			
 
				-  // are quantized at evaluation time with asymmetric quantization.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteBatchMatMulParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteFusedActivation activation;
			
 
				-} TfLiteMulParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  // Parameter added for the version 5.
			
 
				-  bool pot_scale_int16;
			
 
				-} TfLiteSubParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteFusedActivation activation;
			
 
				-} TfLiteDivParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteFusedActivation activation;
			
 
				-} TfLiteL2NormParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int radius;
			
 
				-  float bias;
			
 
				-  float alpha;
			
 
				-  float beta;
			
 
				-} TfLiteLocalResponseNormParams;
			
 
				-
			
 
				-typedef enum {
			
 
				-  kTfLiteLSTMFullKernel = 0,
			
 
				-  kTfLiteLSTMBasicKernel
			
 
				-} TfLiteLSTMKernelType;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters for LSTM version 1.
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  float cell_clip;
			
 
				-  float proj_clip;
			
 
				-
			
 
				-  // Parameters for LSTM version 2.
			
 
				-  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
			
 
				-  TfLiteLSTMKernelType kernel_type;
			
 
				-
			
 
				-  // Parameters for LSTM version 4.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteLSTMParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters needed for the underlying LSTM.
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  float cell_clip;
			
 
				-  float proj_clip;
			
 
				-
			
 
				-  // If set to true then the first dimension is time, otherwise batch.
			
 
				-  bool time_major;
			
 
				-
			
 
				-  // Parameter for unidirectional sequence RNN version 3.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteUnidirectionalSequenceLSTMParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters supported by version 1:
			
 
				-  // Parameters inherited for the LSTM kernel.
			
 
				-  TfLiteFusedActivation activation;
			
 
				-  float cell_clip;
			
 
				-  float proj_clip;
			
 
				-
			
 
				-  // If true, store the outputs of both directions in the first output.
			
 
				-  bool merge_outputs;
			
 
				-
			
 
				-  // Parameters supported by version 2:
			
 
				-  // If set to true then the first dimension is time, otherwise batch.
			
 
				-  bool time_major;
			
 
				-
			
 
				-  // Parameters supported by version 4:
			
 
				-  // If set to true, then hybrid ops use asymmetric quantization for inputs.
			
 
				-  bool asymmetric_quantize_inputs;
			
 
				-} TfLiteBidirectionalSequenceLSTMParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool align_corners;
			
 
				-  // half_pixel_centers assumes pixels are of half the actual dimensions, and
			
 
				-  // yields more accurate resizes. Corresponds to the same argument for the
			
 
				-  // original TensorFlow op in TF2.0.
			
 
				-  bool half_pixel_centers;
			
 
				-} TfLiteResizeBilinearParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool align_corners;
			
 
				-  bool half_pixel_centers;
			
 
				-} TfLiteResizeNearestNeighborParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLitePadParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLitePadV2Params;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // These fields are only used in old models for backward compatibility.
			
 
				-  // In the current implementation, we use the 2nd input of the op as the shape,
			
 
				-  // and these fields are unused.
			
 
				-  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
			
 
				-  int num_dimensions;
			
 
				-} TfLiteReshapeParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int ngram_size;
			
 
				-  int max_skip_size;
			
 
				-  bool include_all_ngrams;
			
 
				-} TfLiteSkipGramParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int block_size;
			
 
				-} TfLiteSpaceToDepthParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int block_size;
			
 
				-} TfLiteDepthToSpaceParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteType in_data_type;
			
 
				-  TfLiteType out_data_type;
			
 
				-} TfLiteCastParams;
			
 
				-
			
 
				-typedef enum {
			
 
				-  kTfLiteCombinerTypeSum = 0,
			
 
				-  kTfLiteCombinerTypeMean = 1,
			
 
				-  kTfLiteCombinerTypeSqrtn = 2,
			
 
				-} TfLiteCombinerType;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteCombinerType combiner;
			
 
				-} TfLiteEmbeddingLookupSparseParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int axis;
			
 
				-  int batch_dims;
			
 
				-} TfLiteGatherParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLiteTransposeParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool keep_dims;
			
 
				-} TfLiteReducerParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int num_splits;
			
 
				-} TfLiteSplitParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int num_splits;
			
 
				-} TfLiteSplitVParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
			
 
				-  // For now we will fix the maximum possible number of dimensions.
			
 
				-  int squeeze_dims[8];
			
 
				-  int num_squeeze_dims;
			
 
				-} TfLiteSqueezeParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int begin_mask;
			
 
				-  int end_mask;
			
 
				-  int ellipsis_mask;
			
 
				-  int new_axis_mask;
			
 
				-  int shrink_axis_mask;
			
 
				-} TfLiteStridedSliceParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteType output_type;
			
 
				-} TfLiteArgMaxParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteType output_type;
			
 
				-} TfLiteArgMinParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLitePadding padding;
			
 
				-  int stride_width;
			
 
				-  int stride_height;
			
 
				-} TfLiteTransposeConvParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool validate_indices;
			
 
				-} TfLiteSparseToDenseParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteType out_type;
			
 
				-} TfLiteShapeParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLiteRankParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  // Parameters supported by version 1:
			
 
				-  float min;
			
 
				-  float max;
			
 
				-  int num_bits;
			
 
				-
			
 
				-  // Parameters supported by version 2:
			
 
				-  bool narrow_range;
			
 
				-} TfLiteFakeQuantParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int values_count;
			
 
				-  int axis;
			
 
				-} TfLitePackParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int axis;
			
 
				-} TfLiteOneHotParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int num;
			
 
				-  int axis;
			
 
				-} TfLiteUnpackParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  float alpha;
			
 
				-} TfLiteLeakyReluParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  TfLiteType index_out_type;
			
 
				-} TfLiteUniqueParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int seq_dim;
			
 
				-  int batch_dim;
			
 
				-} TfLiteReverseSequenceParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLiteMatrixDiagParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  EmptyStructPlaceholder placeholder;
			
 
				-} TfLiteMatrixSetDiagParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int then_subgraph_index;
			
 
				-  int else_subgraph_index;
			
 
				-} TfLiteIfParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int cond_subgraph_index;
			
 
				-  int body_subgraph_index;
			
 
				-} TfLiteWhileParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool exclusive;
			
 
				-  bool reverse;
			
 
				-} TfLiteCumsumParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int init_subgraph_index;
			
 
				-} TfLiteCallOnceParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int table_id;
			
 
				-  TfLiteType key_dtype;
			
 
				-  TfLiteType value_dtype;
			
 
				-} TfLiteHashtableParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  const char* container;
			
 
				-  const char* shared_name;
			
 
				-} TfLiteVarHandleParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int seed;
			
 
				-  int seed2;
			
 
				-} TfLiteRandomParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  int num_boundaries;
			
 
				-  // This points to the memory stored in the model (flatbuffer),
			
 
				-  // and is not owned.
			
 
				-  const float* boundaries;
			
 
				-} TfLiteBucketizeParams;
			
 
				-
			
 
				-typedef struct {
			
 
				-  bool approximate;
			
 
				-} TfLiteGeluParams;
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif  // __cplusplus
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h
+++ b/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h
@@ -1,130 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-// This file declares types used by the pure C inference API defined in c_api.h,
			
 
				-// some of which are also used in the C++ and C kernel and interpreter APIs.
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
			
 
				-#define TENSORFLOW_LITE_C_C_API_TYPES_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
			
 
				-// library.
			
 
				-#ifdef SWIG
			
 
				-#define TFL_CAPI_EXPORT
			
 
				-#elif defined(TFL_STATIC_LIBRARY_BUILD)
			
 
				-#define TFL_CAPI_EXPORT
			
 
				-#else  // not definded TFL_STATIC_LIBRARY_BUILD
			
 
				-#if defined(_WIN32)
			
 
				-#ifdef TFL_COMPILE_LIBRARY
			
 
				-#define TFL_CAPI_EXPORT __declspec(dllexport)
			
 
				-#else
			
 
				-#define TFL_CAPI_EXPORT __declspec(dllimport)
			
 
				-#endif  // TFL_COMPILE_LIBRARY
			
 
				-#else
			
 
				-#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
			
 
				-#endif  // _WIN32
			
 
				-#endif  // SWIG
			
 
				-
			
 
				-// Note that new error status values may be added in future in order to
			
 
				-// indicate more fine-grained internal states, therefore, applications should
			
 
				-// not rely on status values being members of the enum.
			
 
				-typedef enum TfLiteStatus {
			
 
				-  kTfLiteOk = 0,
			
 
				-
			
 
				-  // Generally referring to an error in the runtime (i.e. interpreter)
			
 
				-  kTfLiteError = 1,
			
 
				-
			
 
				-  // Generally referring to an error from a TfLiteDelegate itself.
			
 
				-  kTfLiteDelegateError = 2,
			
 
				-
			
 
				-  // Generally referring to an error in applying a delegate due to
			
 
				-  // incompatibility between runtime and delegate, e.g., this error is returned
			
 
				-  // when trying to apply a TF Lite delegate onto a model graph that's already
			
 
				-  // immutable.
			
 
				-  kTfLiteApplicationError = 3,
			
 
				-
			
 
				-  // Generally referring to serialized delegate data not being found.
			
 
				-  // See tflite::delegates::Serialization.
			
 
				-  kTfLiteDelegateDataNotFound = 4,
			
 
				-
			
 
				-  // Generally referring to data-writing issues in delegate serialization.
			
 
				-  // See tflite::delegates::Serialization.
			
 
				-  kTfLiteDelegateDataWriteError = 5,
			
 
				-
			
 
				-  // Generally referring to data-reading issues in delegate serialization.
			
 
				-  // See tflite::delegates::Serialization.
			
 
				-  kTfLiteDelegateDataReadError = 6,
			
 
				-
			
 
				-  // Generally referring to issues when the TF Lite model has ops that cannot be
			
 
				-  // resolved at runtime. This could happen when the specific op is not
			
 
				-  // registered or built with the TF Lite framework.
			
 
				-  kTfLiteUnresolvedOps = 7,
			
 
				-} TfLiteStatus;
			
 
				-
			
 
				-// Types supported by tensor
			
 
				-typedef enum {
			
 
				-  kTfLiteNoType = 0,
			
 
				-  kTfLiteFloat32 = 1,
			
 
				-  kTfLiteInt32 = 2,
			
 
				-  kTfLiteUInt8 = 3,
			
 
				-  kTfLiteInt64 = 4,
			
 
				-  kTfLiteString = 5,
			
 
				-  kTfLiteBool = 6,
			
 
				-  kTfLiteInt16 = 7,
			
 
				-  kTfLiteComplex64 = 8,
			
 
				-  kTfLiteInt8 = 9,
			
 
				-  kTfLiteFloat16 = 10,
			
 
				-  kTfLiteFloat64 = 11,
			
 
				-  kTfLiteComplex128 = 12,
			
 
				-  kTfLiteUInt64 = 13,
			
 
				-  kTfLiteResource = 14,
			
 
				-  kTfLiteVariant = 15,
			
 
				-  kTfLiteUInt32 = 16,
			
 
				-  kTfLiteUInt16 = 17,
			
 
				-} TfLiteType;
			
 
				-
			
 
				-// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
			
 
				-// If per-layer quantization is specified this field will still be populated in
			
 
				-// addition to TfLiteAffineQuantization.
			
 
				-// Parameters for asymmetric quantization. Quantized values can be converted
			
 
				-// back to float using:
			
 
				-//     real_value = scale * (quantized_value - zero_point)
			
 
				-typedef struct TfLiteQuantizationParams {
			
 
				-  float scale;
			
 
				-  int32_t zero_point;
			
 
				-} TfLiteQuantizationParams;
			
 
				-
			
 
				-// --------------------------------------------------------------------------
			
 
				-// Opaque types used by c_api.h, c_api_opaque.h and common.h.
			
 
				-
			
 
				-// TfLiteOpaqueContext is an opaque version of TfLiteContext;
			
 
				-typedef struct TfLiteOpaqueContext TfLiteOpaqueContext;
			
 
				-
			
 
				-// TfLiteOpaqueNode is an opaque version of TfLiteNode;
			
 
				-typedef struct TfLiteOpaqueNode TfLiteOpaqueNode;
			
 
				-
			
 
				-// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
			
 
				-typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern C
			
 
				-#endif
			
 
				-#endif  // TENSORFLOW_LITE_C_C_API_TYPES_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc
@@ -1,38 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				-#include <cstdarg>
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-int ErrorReporter::Report(const char* format, ...) {
			
 
				-  va_list args;
			
 
				-  va_start(args, format);
			
 
				-  int code = Report(format, args);
			
 
				-  va_end(args);
			
 
				-  return code;
			
 
				-}
			
 
				-
			
 
				-// TODO(aselle): Make the name of ReportError on context the same, so
			
 
				-// we can use the ensure functions w/o a context and w/ a reporter.
			
 
				-int ErrorReporter::ReportError(void*, const char* format, ...) {
			
 
				-  va_list args;
			
 
				-  va_start(args, format);
			
 
				-  int code = Report(format, args);
			
 
				-  va_end(args);
			
 
				-  return code;
			
 
				-}
			
 
				-
			
 
				-}  // namespace tflite
			
--- a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h
@@ -1,59 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
			
 
				-#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
			
 
				-
			
 
				-#include <cstdarg>
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-/// A functor that reports error to supporting system. Invoked similar to
			
 
				-/// printf.
			
 
				-///
			
 
				-/// Usage:
			
 
				-///  ErrorReporter foo;
			
 
				-///  foo.Report("test %d", 5);
			
 
				-/// or
			
 
				-///  va_list args;
			
 
				-///  foo.Report("test %d", args); // where args is va_list
			
 
				-///
			
 
				-/// Subclass ErrorReporter to provide another reporting destination.
			
 
				-/// For example, if you have a GUI program, you might redirect to a buffer
			
 
				-/// that drives a GUI error log box.
			
 
				-class ErrorReporter {
			
 
				- public:
			
 
				-  virtual ~ErrorReporter() {}
			
 
				-  virtual int Report(const char* format, va_list args) = 0;
			
 
				-  int Report(const char* format, ...);
			
 
				-  int ReportError(void*, const char* format, ...);
			
 
				-};
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-// You should not make bare calls to the error reporter, instead use the
			
 
				-// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
			
 
				-// stripped when the binary size has to be optimized. If you are looking to
			
 
				-// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
			
 
				-// every call will be stubbed out, taking no memory.
			
 
				-#ifndef TF_LITE_STRIP_ERROR_STRINGS
			
 
				-#define TF_LITE_REPORT_ERROR(reporter, ...)                             \
			
 
				-  do {                                                                  \
			
 
				-    static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
			
 
				-  } while (false)
			
 
				-#else  // TF_LITE_STRIP_ERROR_STRINGS
			
 
				-#define TF_LITE_REPORT_ERROR(reporter, ...)
			
 
				-#endif  // TF_LITE_STRIP_ERROR_STRINGS
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc
@@ -1,68 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#include "tensorflow/lite/core/api/op_resolver.h"
			
 
				-
			
 
				-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				-#include "tensorflow/lite/schema/schema_utils.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-TfLiteStatus GetRegistrationFromOpCode(
			
 
				-    const OperatorCode* opcode, const OpResolver& op_resolver,
			
 
				-    ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
			
 
				-  TfLiteStatus status = kTfLiteOk;
			
 
				-  *registration = nullptr;
			
 
				-  auto builtin_code = GetBuiltinCode(opcode);
			
 
				-  int version = opcode->version();
			
 
				-
			
 
				-  if (builtin_code > BuiltinOperator_MAX) {
			
 
				-    TF_LITE_REPORT_ERROR(
			
 
				-        error_reporter,
			
 
				-        "Op builtin_code out of range: %d. Are you using old TFLite binary "
			
 
				-        "with newer model?",
			
 
				-        builtin_code);
			
 
				-    status = kTfLiteError;
			
 
				-  } else if (builtin_code != BuiltinOperator_CUSTOM) {
			
 
				-    *registration = op_resolver.FindOp(builtin_code, version);
			
 
				-    if (*registration == nullptr) {
			
 
				-      TF_LITE_REPORT_ERROR(
			
 
				-          error_reporter,
			
 
				-          "Didn't find op for builtin opcode '%s' version '%d'. "
			
 
				-          "An older version of this builtin might be supported. "
			
 
				-          "Are you using an old TFLite binary with a newer model?\n",
			
 
				-          EnumNameBuiltinOperator(builtin_code), version);
			
 
				-      status = kTfLiteError;
			
 
				-    }
			
 
				-  } else if (!opcode->custom_code()) {
			
 
				-    TF_LITE_REPORT_ERROR(
			
 
				-        error_reporter,
			
 
				-        "Operator with CUSTOM builtin_code has no custom_code.\n");
			
 
				-    status = kTfLiteError;
			
 
				-  } else {
			
 
				-    const char* name = opcode->custom_code()->c_str();
			
 
				-    *registration = op_resolver.FindOp(name, version);
			
 
				-    if (*registration == nullptr) {
			
 
				-      // Do not report error for unresolved custom op, we do the final check
			
 
				-      // while preparing ops.
			
 
				-      status = kTfLiteError;
			
 
				-    }
			
 
				-  }
			
 
				-  return status;
			
 
				-}
			
 
				-
			
 
				-}  // namespace tflite
			
--- a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h
@@ -1,140 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
			
 
				-#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
			
 
				-
			
 
				-#include <functional>
			
 
				-#include <memory>
			
 
				-#include <vector>
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-#include "tensorflow/lite/core/api/error_reporter.h"
			
 
				-#include "tensorflow/lite/schema/schema_generated.h"
			
 
				-
			
 
				-// Opaque type similar to TfLiteDelegate / TfLiteOpaqueDelegate.
			
 
				-// This is used for cases (e.g. when using "TF Lite with Google Play Services")
			
 
				-// where the TF Lite runtime might be built using a newer (or older)
			
 
				-// version of the TF Lite sources than the app, and hence might have a
			
 
				-// different definition of the TfLiteDelegate type. TF Lite APIs use
			
 
				-// TfLiteOpaqueDelegate rather than TfLiteDelegate when they want to
			
 
				-// refer to a delegate defined with that potentially different version
			
 
				-// of the TfLiteDelegate type.
			
 
				-struct TfLiteOpaqueDelegateStruct;
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-/// Abstract interface that returns TfLiteRegistrations given op codes or custom
			
 
				-/// op names. This is the mechanism that ops being referenced in the flatbuffer
			
 
				-/// model are mapped to executable function pointers (TfLiteRegistrations).
			
 
				-class OpResolver {
			
 
				- public:
			
 
				-  /// Finds the op registration for a builtin operator by enum code.
			
 
				-  virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
			
 
				-                                           int version) const = 0;
			
 
				-  /// Finds the op registration of a custom operator by op name.
			
 
				-  virtual const TfLiteRegistration* FindOp(const char* op,
			
 
				-                                           int version) const = 0;
			
 
				-
			
 
				-  // Represents a sequence of delegates.
			
 
				-  using TfLiteDelegatePtrVector =
			
 
				-      std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
			
 
				-
			
 
				-  // Returns optional delegates for resolving and handling ops in the flatbuffer
			
 
				-  // model. This may be used in addition to the standard TfLiteRegistration
			
 
				-  // lookup for graph resolution.
			
 
				-  // WARNING: This API is deprecated, GetDelegateCreators is preferred.
			
 
				-  virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
			
 
				-    return {};
			
 
				-  }
			
 
				-
			
 
				-  // Represents a function that creates a TfLite delegate instance.
			
 
				-  using TfLiteDelegateCreator =
			
 
				-      std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
			
 
				-          int /*num_threads*/)>;
			
 
				-
			
 
				-  // Represents a sequence of delegate creator functions.
			
 
				-  using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
			
 
				-
			
 
				-  // Returns a vector of delegate creators to create optional delegates for
			
 
				-  // resolving and handling ops in the flatbuffer model. This may be used in
			
 
				-  // addition to the standard TfLiteRegistration lookup for graph resolution.
			
 
				-  //
			
 
				-  // Note that this method is not used (will not be called) if you are using
			
 
				-  // TF Lite in Google Play Services; the GetOpaqueDelegateCreators method
			
 
				-  // (see below) is used for that case.
			
 
				-  virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
			
 
				-
			
 
				-  // TODO(b/202712825): it would be nice if we could avoid the need for separate
			
 
				-  // "opaque" types & methods for use only with TF Lite in Google Play Services.
			
 
				-
			
 
				-  // Represents an opaque delegate instance.
			
 
				-  // WARNING: Experimental interface, subject to change.
			
 
				-  using TfLiteOpaqueDelegatePtr =
			
 
				-      std::unique_ptr<TfLiteOpaqueDelegateStruct,
			
 
				-                      void (*)(TfLiteOpaqueDelegateStruct*)>;
			
 
				-
			
 
				-  // Represents a function that creates an opaque delegate instance.
			
 
				-  // WARNING: Experimental interface, subject to change.
			
 
				-  using TfLiteOpaqueDelegateCreator =
			
 
				-      std::function<TfLiteOpaqueDelegatePtr(int /*num_threads*/)>;
			
 
				-
			
 
				-  // Represents a sequence of opaque delegate creator functions.
			
 
				-  // WARNING: Experimental interface, subject to change.
			
 
				-  using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
			
 
				-
			
 
				-  // Returns a vector of opaque delegate creators to create optional opaque
			
 
				-  // delegates for resolving and handling ops in the flatbuffer model. This may
			
 
				-  // be used in addition to the standard TfLiteRegistration lookup for graph
			
 
				-  // resolution.
			
 
				-  //
			
 
				-  // Note that this method will be called only if you are using TF Lite in
			
 
				-  // Google Play Services; if you are using regular TF Lite, GetDelegateCreators
			
 
				-  // (see above) is used instead.
			
 
				-  //
			
 
				-  // WARNING: Experimental interface, subject to change.
			
 
				-  virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const {
			
 
				-    return {};
			
 
				-  }
			
 
				-
			
 
				-  virtual ~OpResolver() {}
			
 
				-
			
 
				- private:
			
 
				-  /// Returns true if this OpResolver may contain any "user defined" ops.
			
 
				-  /// By "user defined" ops, we mean any op definitions other than those
			
 
				-  /// contained in tflite::ops::builtin::BuiltinOpResolver.
			
 
				-  ///
			
 
				-  /// If this method returns true, it doesn't necessarily mean that the
			
 
				-  /// OpResolver contains a user-defined op, just that the absence of
			
 
				-  /// user-defined ops can't be guaranteed.
			
 
				-  ///
			
 
				-  /// Note that "user-defined" ops are not the same as "custom" ops;
			
 
				-  /// BuiltinOpResolver may support certain "custom" ops, in addition to
			
 
				-  /// "builtin" ops, and may not support all of the "builtin" op enum values.
			
 
				-  virtual bool MayContainUserDefinedOps() const { return true; }
			
 
				-
			
 
				-  friend class OpResolverInternal;
			
 
				-};
			
 
				-
			
 
				-// Handles the logic for converting between an OperatorCode structure extracted
			
 
				-// from a flatbuffer and information about a registered operator
			
 
				-// implementation.
			
 
				-TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
			
 
				-                                       const OpResolver& op_resolver,
			
 
				-                                       ErrorReporter* error_reporter,
			
 
				-                                       const TfLiteRegistration** registration);
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc
@@ -1,50 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#include "tensorflow/lite/core/api/tensor_utils.h"
			
 
				-
			
 
				-#include <string.h>
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
			
 
				-  if (!tensor->is_variable) {
			
 
				-    return kTfLiteOk;
			
 
				-  }
			
 
				-  // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
			
 
				-  // to the value of the buffer.
			
 
				-  int value = 0;
			
 
				-  if (tensor->type == kTfLiteInt8) {
			
 
				-    value = tensor->params.zero_point;
			
 
				-  }
			
 
				-  // TODO(b/139446230): Provide a platform header to better handle these
			
 
				-  // specific scenarios.
			
 
				-#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
			
 
				-    defined(__i386) || defined(__x86__) || defined(__X86__) || \
			
 
				-    defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
			
 
				-  memset(tensor->data.raw, value, tensor->bytes);
			
 
				-#else
			
 
				-  char* raw_ptr = tensor->data.raw;
			
 
				-  for (size_t i = 0; i < tensor->bytes; ++i) {
			
 
				-    *raw_ptr = value;
			
 
				-    raw_ptr++;
			
 
				-  }
			
 
				-#endif
			
 
				-  return kTfLiteOk;
			
 
				-}
			
 
				-
			
 
				-}  // namespace tflite
			
--- a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h
+++ b/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h
@@ -1,28 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
			
 
				-#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-// Resets a variable tensor to the default value.
			
 
				-TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h
@@ -1,102 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-#include <cstdint>
			
 
				-
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-static inline int CountLeadingZeros32Slow(uint64_t n) {
			
 
				-  int zeroes = 28;
			
 
				-  if (n >> 16) zeroes -= 16, n >>= 16;
			
 
				-  if (n >> 8) zeroes -= 8, n >>= 8;
			
 
				-  if (n >> 4) zeroes -= 4, n >>= 4;
			
 
				-  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
			
 
				-}
			
 
				-
			
 
				-static inline int CountLeadingZeros32(uint32_t n) {
			
 
				-#if defined(_MSC_VER)
			
 
				-  unsigned long result = 0;  // NOLINT(runtime/int)
			
 
				-  if (_BitScanReverse(&result, n)) {
			
 
				-    return 31 - result;
			
 
				-  }
			
 
				-  return 32;
			
 
				-#elif defined(__GNUC__)
			
 
				-
			
 
				-  // Handle 0 as a special case because __builtin_clz(0) is undefined.
			
 
				-  if (n == 0) {
			
 
				-    return 32;
			
 
				-  }
			
 
				-  return __builtin_clz(n);
			
 
				-#else
			
 
				-  return CountLeadingZeros32Slow(n);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-static inline int MostSignificantBit32(uint32_t n) {
			
 
				-  return 32 - CountLeadingZeros32(n);
			
 
				-}
			
 
				-
			
 
				-static inline int CountLeadingZeros64Slow(uint64_t n) {
			
 
				-  int zeroes = 60;
			
 
				-  if (n >> 32) zeroes -= 32, n >>= 32;
			
 
				-  if (n >> 16) zeroes -= 16, n >>= 16;
			
 
				-  if (n >> 8) zeroes -= 8, n >>= 8;
			
 
				-  if (n >> 4) zeroes -= 4, n >>= 4;
			
 
				-  return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
			
 
				-}
			
 
				-
			
 
				-static inline int CountLeadingZeros64(uint64_t n) {
			
 
				-#if defined(_MSC_VER) && defined(_M_X64)
			
 
				-  // MSVC does not have __builtin_clzll. Use _BitScanReverse64.
			
 
				-  unsigned long result = 0;  // NOLINT(runtime/int)
			
 
				-  if (_BitScanReverse64(&result, n)) {
			
 
				-    return 63 - result;
			
 
				-  }
			
 
				-  return 64;
			
 
				-#elif defined(_MSC_VER)
			
 
				-  // MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
			
 
				-  unsigned long result = 0;  // NOLINT(runtime/int)
			
 
				-  if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
			
 
				-    return 31 - result;
			
 
				-  }
			
 
				-  if (_BitScanReverse(&result, n)) {
			
 
				-    return 63 - result;
			
 
				-  }
			
 
				-  return 64;
			
 
				-#elif defined(__GNUC__)
			
 
				-
			
 
				-  // Handle 0 as a special case because __builtin_clzll(0) is undefined.
			
 
				-  if (n == 0) {
			
 
				-    return 64;
			
 
				-  }
			
 
				-  return __builtin_clzll(n);
			
 
				-#else
			
 
				-  return CountLeadingZeros64Slow(n);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-static inline int MostSignificantBit64(uint64_t n) {
			
 
				-  return 64 - CountLeadingZeros64(n);
			
 
				-}
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc
@@ -1,52 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				-
			
 
				-#include <string.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
			
 
				-
			
 
				-void FftCompute(struct FftState* state, const int16_t* input,
			
 
				-                int input_scale_shift) {
			
 
				-  const size_t input_size = state->input_size;
			
 
				-  const size_t fft_size = state->fft_size;
			
 
				-
			
 
				-  int16_t* fft_input = state->input;
			
 
				-  // First, scale the input by the given shift.
			
 
				-  size_t i;
			
 
				-  for (i = 0; i < input_size; ++i) {
			
 
				-    fft_input[i] = static_cast<int16_t>(static_cast<uint16_t>(input[i])
			
 
				-                                        << input_scale_shift);
			
 
				-  }
			
 
				-  // Zero out whatever else remains in the top part of the input.
			
 
				-  for (; i < fft_size; ++i) {
			
 
				-    fft_input[i] = 0;
			
 
				-  }
			
 
				-
			
 
				-  // Apply the FFT.
			
 
				-  kissfft_fixed16::kiss_fftr(
			
 
				-      reinterpret_cast<kissfft_fixed16::kiss_fftr_cfg>(state->scratch),
			
 
				-      state->input,
			
 
				-      reinterpret_cast<kissfft_fixed16::kiss_fft_cpx*>(state->output));
			
 
				-}
			
 
				-
			
 
				-void FftInit(struct FftState* state) {
			
 
				-  // All the initialization is done in FftPopulateState()
			
 
				-}
			
 
				-
			
 
				-void FftReset(struct FftState* state) {
			
 
				-  memset(state->input, 0, state->fft_size * sizeof(*state->input));
			
 
				-  memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h
@@ -1,50 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct complex_int16_t {
			
 
				-  int16_t real;
			
 
				-  int16_t imag;
			
 
				-};
			
 
				-
			
 
				-struct FftState {
			
 
				-  int16_t* input;
			
 
				-  struct complex_int16_t* output;
			
 
				-  size_t fft_size;
			
 
				-  size_t input_size;
			
 
				-  void* scratch;
			
 
				-  size_t scratch_size;
			
 
				-};
			
 
				-
			
 
				-void FftCompute(struct FftState* state, const int16_t* input,
			
 
				-                int input_scale_shift);
			
 
				-
			
 
				-void FftInit(struct FftState* state);
			
 
				-
			
 
				-void FftReset(struct FftState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc
@@ -1,70 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
			
 
				-
			
 
				-int FftPopulateState(struct FftState* state, size_t input_size) {
			
 
				-  state->input_size = input_size;
			
 
				-  state->fft_size = 1;
			
 
				-  while (state->fft_size < state->input_size) {
			
 
				-    state->fft_size <<= 1;
			
 
				-  }
			
 
				-
			
 
				-  state->input = reinterpret_cast<int16_t*>(
			
 
				-      malloc(state->fft_size * sizeof(*state->input)));
			
 
				-  if (state->input == nullptr) {
			
 
				-    fprintf(stderr, "Failed to alloc fft input buffer\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  state->output = reinterpret_cast<complex_int16_t*>(
			
 
				-      malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
			
 
				-  if (state->output == nullptr) {
			
 
				-    fprintf(stderr, "Failed to alloc fft output buffer\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Ask kissfft how much memory it wants.
			
 
				-  size_t scratch_size = 0;
			
 
				-  kissfft_fixed16::kiss_fftr_cfg kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(
			
 
				-      state->fft_size, 0, nullptr, &scratch_size);
			
 
				-  if (kfft_cfg != nullptr) {
			
 
				-    fprintf(stderr, "Kiss memory sizing failed.\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  state->scratch = malloc(scratch_size);
			
 
				-  if (state->scratch == nullptr) {
			
 
				-    fprintf(stderr, "Failed to alloc fft scratch buffer\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  state->scratch_size = scratch_size;
			
 
				-  // Let kissfft configure the scratch space we just allocated
			
 
				-  kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(state->fft_size, 0,
			
 
				-                                              state->scratch, &scratch_size);
			
 
				-  if (kfft_cfg != state->scratch) {
			
 
				-    fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void FftFreeStateContents(struct FftState* state) {
			
 
				-  free(state->input);
			
 
				-  free(state->output);
			
 
				-  free(state->scratch);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h
@@ -1,34 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// Prepares and FFT for the given input size.
			
 
				-int FftPopulateState(struct FftState* state, size_t input_size);
			
 
				-
			
 
				-// Frees any allocated buffers.
			
 
				-void FftFreeStateContents(struct FftState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
@@ -1,134 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
			
 
				-
			
 
				-#include <string.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				-
			
 
				-void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
			
 
				-                                         struct complex_int16_t* fft_output,
			
 
				-                                         int32_t* energy) {
			
 
				-  const int end_index = state->end_index;
			
 
				-  int i;
			
 
				-  energy += state->start_index;
			
 
				-  fft_output += state->start_index;
			
 
				-  for (i = state->start_index; i < end_index; ++i) {
			
 
				-    const int32_t real = fft_output->real;
			
 
				-    const int32_t imag = fft_output->imag;
			
 
				-    fft_output++;
			
 
				-    const uint32_t mag_squared = (real * real) + (imag * imag);
			
 
				-    *energy++ = mag_squared;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void FilterbankAccumulateChannels(struct FilterbankState* state,
			
 
				-                                  const int32_t* energy) {
			
 
				-  uint64_t* work = state->work;
			
 
				-  uint64_t weight_accumulator = 0;
			
 
				-  uint64_t unweight_accumulator = 0;
			
 
				-
			
 
				-  const int16_t* channel_frequency_starts = state->channel_frequency_starts;
			
 
				-  const int16_t* channel_weight_starts = state->channel_weight_starts;
			
 
				-  const int16_t* channel_widths = state->channel_widths;
			
 
				-
			
 
				-  int num_channels_plus_1 = state->num_channels + 1;
			
 
				-  int i;
			
 
				-  for (i = 0; i < num_channels_plus_1; ++i) {
			
 
				-    const int32_t* magnitudes = energy + *channel_frequency_starts++;
			
 
				-    const int16_t* weights = state->weights + *channel_weight_starts;
			
 
				-    const int16_t* unweights = state->unweights + *channel_weight_starts++;
			
 
				-    const int width = *channel_widths++;
			
 
				-    int j;
			
 
				-    for (j = 0; j < width; ++j) {
			
 
				-      weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
			
 
				-      unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
			
 
				-      ++magnitudes;
			
 
				-    }
			
 
				-    *work++ = weight_accumulator;
			
 
				-    weight_accumulator = unweight_accumulator;
			
 
				-    unweight_accumulator = 0;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-static uint16_t Sqrt32(uint32_t num) {
			
 
				-  if (num == 0) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-  uint32_t res = 0;
			
 
				-  int max_bit_number = 32 - MostSignificantBit32(num);
			
 
				-  max_bit_number |= 1;
			
 
				-  uint32_t bit = 1U << (31 - max_bit_number);
			
 
				-  int iterations = (31 - max_bit_number) / 2 + 1;
			
 
				-  while (iterations--) {
			
 
				-    if (num >= res + bit) {
			
 
				-      num -= res + bit;
			
 
				-      res = (res >> 1U) + bit;
			
 
				-    } else {
			
 
				-      res >>= 1U;
			
 
				-    }
			
 
				-    bit >>= 2U;
			
 
				-  }
			
 
				-  // Do rounding - if we have the bits.
			
 
				-  if (num > res && res != 0xFFFF) {
			
 
				-    ++res;
			
 
				-  }
			
 
				-  return res;
			
 
				-}
			
 
				-
			
 
				-static uint32_t Sqrt64(uint64_t num) {
			
 
				-  // Take a shortcut and just use 32 bit operations if the upper word is all
			
 
				-  // clear. This will cause a slight off by one issue for numbers close to 2^32,
			
 
				-  // but it probably isn't going to matter (and gives us a big performance win).
			
 
				-  if ((num >> 32) == 0) {
			
 
				-    return Sqrt32((uint32_t)num);
			
 
				-  }
			
 
				-  uint64_t res = 0;
			
 
				-  int max_bit_number = 64 - MostSignificantBit64(num);
			
 
				-  max_bit_number |= 1;
			
 
				-  uint64_t bit = 1ULL << (63 - max_bit_number);
			
 
				-  int iterations = (63 - max_bit_number) / 2 + 1;
			
 
				-  while (iterations--) {
			
 
				-    if (num >= res + bit) {
			
 
				-      num -= res + bit;
			
 
				-      res = (res >> 1U) + bit;
			
 
				-    } else {
			
 
				-      res >>= 1U;
			
 
				-    }
			
 
				-    bit >>= 2U;
			
 
				-  }
			
 
				-  // Do rounding - if we have the bits.
			
 
				-  if (num > res && res != 0xFFFFFFFFLL) {
			
 
				-    ++res;
			
 
				-  }
			
 
				-  return res;
			
 
				-}
			
 
				-
			
 
				-uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
			
 
				-  const int num_channels = state->num_channels;
			
 
				-  const uint64_t* work = state->work + 1;
			
 
				-  // Reuse the work buffer since we're fine clobbering it at this point to hold
			
 
				-  // the output.
			
 
				-  uint32_t* output = (uint32_t*)state->work;
			
 
				-  int i;
			
 
				-  for (i = 0; i < num_channels; ++i) {
			
 
				-    *output++ = Sqrt64(*work++) >> scale_down_shift;
			
 
				-  }
			
 
				-  return (uint32_t*)state->work;
			
 
				-}
			
 
				-
			
 
				-void FilterbankReset(struct FilterbankState* state) {
			
 
				-  memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h
@@ -1,63 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				-
			
 
				-#define kFilterbankBits 12
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct FilterbankState {
			
 
				-  int num_channels;
			
 
				-  int start_index;
			
 
				-  int end_index;
			
 
				-  int16_t* channel_frequency_starts;
			
 
				-  int16_t* channel_weight_starts;
			
 
				-  int16_t* channel_widths;
			
 
				-  int16_t* weights;
			
 
				-  int16_t* unweights;
			
 
				-  uint64_t* work;
			
 
				-};
			
 
				-
			
 
				-// Converts the relevant complex values of an FFT output into energy (the
			
 
				-// square magnitude).
			
 
				-void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
			
 
				-                                         struct complex_int16_t* fft_output,
			
 
				-                                         int32_t* energy);
			
 
				-
			
 
				-// Computes the mel-scale filterbank on the given energy array. Output is cached
			
 
				-// internally - to fetch it, you need to call FilterbankSqrt.
			
 
				-void FilterbankAccumulateChannels(struct FilterbankState* state,
			
 
				-                                  const int32_t* energy);
			
 
				-
			
 
				-// Applies an integer square root to the 64 bit intermediate values of the
			
 
				-// filterbank, and returns a pointer to them. Memory will be invalidated the
			
 
				-// next time FilterbankAccumulateChannels is called.
			
 
				-uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
			
 
				-
			
 
				-void FilterbankReset(struct FilterbankState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c
@@ -1,220 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
			
 
				-
			
 
				-#include <assert.h>
			
 
				-#include <math.h>
			
 
				-#include <stdio.h>
			
 
				-
			
 
				-#define kFilterbankIndexAlignment 4
			
 
				-#define kFilterbankChannelBlockSize 4
			
 
				-
			
 
				-void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
			
 
				-  config->num_channels = 32;
			
 
				-  config->lower_band_limit = 125.0f;
			
 
				-  config->upper_band_limit = 7500.0f;
			
 
				-  config->output_scale_shift = 7;
			
 
				-}
			
 
				-
			
 
				-static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); }
			
 
				-
			
 
				-static void CalculateCenterFrequencies(const int num_channels,
			
 
				-                                       const float lower_frequency_limit,
			
 
				-                                       const float upper_frequency_limit,
			
 
				-                                       float* center_frequencies) {
			
 
				-  assert(lower_frequency_limit >= 0.0f);
			
 
				-  assert(upper_frequency_limit > lower_frequency_limit);
			
 
				-
			
 
				-  const float mel_low = FreqToMel(lower_frequency_limit);
			
 
				-  const float mel_hi = FreqToMel(upper_frequency_limit);
			
 
				-  const float mel_span = mel_hi - mel_low;
			
 
				-  const float mel_spacing = mel_span / ((float)num_channels);
			
 
				-  int i;
			
 
				-  for (i = 0; i < num_channels; ++i) {
			
 
				-    center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
			
 
				-                                      int16_t* unweight) {
			
 
				-  *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
			
 
				-  *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
			
 
				-}
			
 
				-
			
 
				-int FilterbankPopulateState(const struct FilterbankConfig* config,
			
 
				-                            struct FilterbankState* state, int sample_rate,
			
 
				-                            int spectrum_size) {
			
 
				-  state->num_channels = config->num_channels;
			
 
				-  const int num_channels_plus_1 = config->num_channels + 1;
			
 
				-
			
 
				-  // How should we align things to index counts given the byte alignment?
			
 
				-  const int index_alignment =
			
 
				-      (kFilterbankIndexAlignment < sizeof(int16_t)
			
 
				-           ? 1
			
 
				-           : kFilterbankIndexAlignment / sizeof(int16_t));
			
 
				-
			
 
				-  state->channel_frequency_starts =
			
 
				-      malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
			
 
				-  state->channel_weight_starts =
			
 
				-      malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
			
 
				-  state->channel_widths =
			
 
				-      malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
			
 
				-  state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
			
 
				-
			
 
				-  float* center_mel_freqs =
			
 
				-      malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
			
 
				-  int16_t* actual_channel_starts =
			
 
				-      malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
			
 
				-  int16_t* actual_channel_widths =
			
 
				-      malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
			
 
				-
			
 
				-  if (state->channel_frequency_starts == NULL ||
			
 
				-      state->channel_weight_starts == NULL || state->channel_widths == NULL ||
			
 
				-      center_mel_freqs == NULL || actual_channel_starts == NULL ||
			
 
				-      actual_channel_widths == NULL) {
			
 
				-    free(center_mel_freqs);
			
 
				-    free(actual_channel_starts);
			
 
				-    free(actual_channel_widths);
			
 
				-    fprintf(stderr, "Failed to allocate channel buffers\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
			
 
				-                             config->upper_band_limit, center_mel_freqs);
			
 
				-
			
 
				-  // Always exclude DC.
			
 
				-  const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
			
 
				-  state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
			
 
				-  state->end_index = 0;  // Initialized to zero here, but actually set below.
			
 
				-
			
 
				-  // For each channel, we need to figure out what frequencies belong to it, and
			
 
				-  // how much padding we need to add so that we can efficiently multiply the
			
 
				-  // weights and unweights for accumulation. To simplify the multiplication
			
 
				-  // logic, all channels will have some multiplication to do (even if there are
			
 
				-  // no frequencies that accumulate to that channel) - they will be directed to
			
 
				-  // a set of zero weights.
			
 
				-  int chan_freq_index_start = state->start_index;
			
 
				-  int weight_index_start = 0;
			
 
				-  int needs_zeros = 0;
			
 
				-
			
 
				-  int chan;
			
 
				-  for (chan = 0; chan < num_channels_plus_1; ++chan) {
			
 
				-    // Keep jumping frequencies until we overshoot the bound on this channel.
			
 
				-    int freq_index = chan_freq_index_start;
			
 
				-    while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
			
 
				-      ++freq_index;
			
 
				-    }
			
 
				-
			
 
				-    const int width = freq_index - chan_freq_index_start;
			
 
				-    actual_channel_starts[chan] = chan_freq_index_start;
			
 
				-    actual_channel_widths[chan] = width;
			
 
				-
			
 
				-    if (width == 0) {
			
 
				-      // This channel doesn't actually get anything from the frequencies, it's
			
 
				-      // always zero. We need then to insert some 'zero' weights into the
			
 
				-      // output, and just redirect this channel to do a single multiplication at
			
 
				-      // this point. For simplicity, the zeros are placed at the beginning of
			
 
				-      // the weights arrays, so we have to go and update all the other
			
 
				-      // weight_starts to reflect this shift (but only once).
			
 
				-      state->channel_frequency_starts[chan] = 0;
			
 
				-      state->channel_weight_starts[chan] = 0;
			
 
				-      state->channel_widths[chan] = kFilterbankChannelBlockSize;
			
 
				-      if (!needs_zeros) {
			
 
				-        needs_zeros = 1;
			
 
				-        int j;
			
 
				-        for (j = 0; j < chan; ++j) {
			
 
				-          state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
			
 
				-        }
			
 
				-        weight_index_start += kFilterbankChannelBlockSize;
			
 
				-      }
			
 
				-    } else {
			
 
				-      // How far back do we need to go to ensure that we have the proper
			
 
				-      // alignment?
			
 
				-      const int aligned_start =
			
 
				-          (chan_freq_index_start / index_alignment) * index_alignment;
			
 
				-      const int aligned_width = (chan_freq_index_start - aligned_start + width);
			
 
				-      const int padded_width =
			
 
				-          (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
			
 
				-          kFilterbankChannelBlockSize;
			
 
				-
			
 
				-      state->channel_frequency_starts[chan] = aligned_start;
			
 
				-      state->channel_weight_starts[chan] = weight_index_start;
			
 
				-      state->channel_widths[chan] = padded_width;
			
 
				-      weight_index_start += padded_width;
			
 
				-    }
			
 
				-    chan_freq_index_start = freq_index;
			
 
				-  }
			
 
				-
			
 
				-  // Allocate the two arrays to store the weights - weight_index_start contains
			
 
				-  // the index of what would be the next set of weights that we would need to
			
 
				-  // add, so that's how many weights we need to allocate.
			
 
				-  state->weights = calloc(weight_index_start, sizeof(*state->weights));
			
 
				-  state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
			
 
				-
			
 
				-  // If the alloc failed, we also need to nuke the arrays.
			
 
				-  if (state->weights == NULL || state->unweights == NULL) {
			
 
				-    free(center_mel_freqs);
			
 
				-    free(actual_channel_starts);
			
 
				-    free(actual_channel_widths);
			
 
				-    fprintf(stderr, "Failed to allocate weights or unweights\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Next pass, compute all the weights. Since everything has been memset to
			
 
				-  // zero, we only need to fill in the weights that correspond to some frequency
			
 
				-  // for a channel.
			
 
				-  const float mel_low = FreqToMel(config->lower_band_limit);
			
 
				-  for (chan = 0; chan < num_channels_plus_1; ++chan) {
			
 
				-    int frequency = actual_channel_starts[chan];
			
 
				-    const int num_frequencies = actual_channel_widths[chan];
			
 
				-    const int frequency_offset =
			
 
				-        frequency - state->channel_frequency_starts[chan];
			
 
				-    const int weight_start = state->channel_weight_starts[chan];
			
 
				-    const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
			
 
				-
			
 
				-    int j;
			
 
				-    for (j = 0; j < num_frequencies; ++j, ++frequency) {
			
 
				-      const float weight =
			
 
				-          (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
			
 
				-          (center_mel_freqs[chan] - denom_val);
			
 
				-
			
 
				-      // Make the float into an integer for the weights (and unweights).
			
 
				-      const int weight_index = weight_start + frequency_offset + j;
			
 
				-      QuantizeFilterbankWeights(weight, state->weights + weight_index,
			
 
				-                                state->unweights + weight_index);
			
 
				-    }
			
 
				-    if (frequency > state->end_index) {
			
 
				-      state->end_index = frequency;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  free(center_mel_freqs);
			
 
				-  free(actual_channel_starts);
			
 
				-  free(actual_channel_widths);
			
 
				-  if (state->end_index >= spectrum_size) {
			
 
				-    fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void FilterbankFreeStateContents(struct FilterbankState* state) {
			
 
				-  free(state->channel_frequency_starts);
			
 
				-  free(state->channel_weight_starts);
			
 
				-  free(state->channel_widths);
			
 
				-  free(state->weights);
			
 
				-  free(state->unweights);
			
 
				-  free(state->work);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h
@@ -1,50 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct FilterbankConfig {
			
 
				-  // number of frequency channel buckets for filterbank
			
 
				-  int num_channels;
			
 
				-  // maximum frequency to include
			
 
				-  float upper_band_limit;
			
 
				-  // minimum frequency to include
			
 
				-  float lower_band_limit;
			
 
				-  // unused
			
 
				-  int output_scale_shift;
			
 
				-};
			
 
				-
			
 
				-// Fills the frontendConfig with "sane" defaults.
			
 
				-void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
			
 
				-
			
 
				-// Allocates any buffers.
			
 
				-int FilterbankPopulateState(const struct FilterbankConfig* config,
			
 
				-                            struct FilterbankState* state, int sample_rate,
			
 
				-                            int spectrum_size);
			
 
				-
			
 
				-// Frees any allocated buffers.
			
 
				-void FilterbankFreeStateContents(struct FilterbankState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c
@@ -1,72 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				-
			
 
				-struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
			
 
				-                                             const int16_t* samples,
			
 
				-                                             size_t num_samples,
			
 
				-                                             size_t* num_samples_read) {
			
 
				-  struct FrontendOutput output;
			
 
				-  output.values = NULL;
			
 
				-  output.size = 0;
			
 
				-
			
 
				-  // Try to apply the window - if it fails, return and wait for more data.
			
 
				-  if (!WindowProcessSamples(&state->window, samples, num_samples,
			
 
				-                            num_samples_read)) {
			
 
				-    return output;
			
 
				-  }
			
 
				-
			
 
				-  // Apply the FFT to the window's output (and scale it so that the fixed point
			
 
				-  // FFT can have as much resolution as possible).
			
 
				-  int input_shift =
			
 
				-      15 - MostSignificantBit32(state->window.max_abs_output_value);
			
 
				-  FftCompute(&state->fft, state->window.output, input_shift);
			
 
				-
			
 
				-  // We can re-ruse the fft's output buffer to hold the energy.
			
 
				-  int32_t* energy = (int32_t*)state->fft.output;
			
 
				-
			
 
				-  FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
			
 
				-                                      energy);
			
 
				-
			
 
				-  FilterbankAccumulateChannels(&state->filterbank, energy);
			
 
				-  uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
			
 
				-
			
 
				-  // Apply noise reduction.
			
 
				-  NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
			
 
				-
			
 
				-  if (state->pcan_gain_control.enable_pcan) {
			
 
				-    PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
			
 
				-  }
			
 
				-
			
 
				-  // Apply the log and scale.
			
 
				-  int correction_bits =
			
 
				-      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
			
 
				-  uint16_t* logged_filterbank =
			
 
				-      LogScaleApply(&state->log_scale, scaled_filterbank,
			
 
				-                    state->filterbank.num_channels, correction_bits);
			
 
				-
			
 
				-  output.size = state->filterbank.num_channels;
			
 
				-  output.values = logged_filterbank;
			
 
				-  return output;
			
 
				-}
			
 
				-
			
 
				-void FrontendReset(struct FrontendState* state) {
			
 
				-  WindowReset(&state->window);
			
 
				-  FftReset(&state->fft);
			
 
				-  FilterbankReset(&state->filterbank);
			
 
				-  NoiseReductionReset(&state->noise_reduction);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h
@@ -1,64 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct FrontendState {
			
 
				-  struct WindowState window;
			
 
				-  struct FftState fft;
			
 
				-  struct FilterbankState filterbank;
			
 
				-  struct NoiseReductionState noise_reduction;
			
 
				-  struct PcanGainControlState pcan_gain_control;
			
 
				-  struct LogScaleState log_scale;
			
 
				-};
			
 
				-
			
 
				-struct FrontendOutput {
			
 
				-  const uint16_t* values;
			
 
				-  size_t size;
			
 
				-};
			
 
				-
			
 
				-// Main entry point to processing frontend samples. Updates num_samples_read to
			
 
				-// contain the number of samples that have been consumed from the input array.
			
 
				-// Returns a struct containing the generated output. If not enough samples were
			
 
				-// added to generate a feature vector, the returned size will be 0 and the
			
 
				-// values pointer will be NULL. Note that the output pointer will be invalidated
			
 
				-// as soon as FrontendProcessSamples is called again, so copy the contents
			
 
				-// elsewhere if you need to use them later.
			
 
				-struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
			
 
				-                                             const int16_t* samples,
			
 
				-                                             size_t num_samples,
			
 
				-                                             size_t* num_samples_read);
			
 
				-
			
 
				-void FrontendReset(struct FrontendState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c
@@ -1,85 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				-
			
 
				-void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
			
 
				-  WindowFillConfigWithDefaults(&config->window);
			
 
				-  FilterbankFillConfigWithDefaults(&config->filterbank);
			
 
				-  NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
			
 
				-  PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
			
 
				-  LogScaleFillConfigWithDefaults(&config->log_scale);
			
 
				-}
			
 
				-
			
 
				-int FrontendPopulateState(const struct FrontendConfig* config,
			
 
				-                          struct FrontendState* state, int sample_rate) {
			
 
				-  memset(state, 0, sizeof(*state));
			
 
				-
			
 
				-  if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
			
 
				-    fprintf(stderr, "Failed to populate window state\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  if (!FftPopulateState(&state->fft, state->window.size)) {
			
 
				-    fprintf(stderr, "Failed to populate fft state\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  FftInit(&state->fft);
			
 
				-
			
 
				-  if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
			
 
				-                               sample_rate, state->fft.fft_size / 2 + 1)) {
			
 
				-    fprintf(stderr, "Failed to populate filterbank state\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  if (!NoiseReductionPopulateState(&config->noise_reduction,
			
 
				-                                   &state->noise_reduction,
			
 
				-                                   state->filterbank.num_channels)) {
			
 
				-    fprintf(stderr, "Failed to populate noise reduction state\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  int input_correction_bits =
			
 
				-      MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
			
 
				-  if (!PcanGainControlPopulateState(
			
 
				-          &config->pcan_gain_control, &state->pcan_gain_control,
			
 
				-          state->noise_reduction.estimate, state->filterbank.num_channels,
			
 
				-          state->noise_reduction.smoothing_bits, input_correction_bits)) {
			
 
				-    fprintf(stderr, "Failed to populate pcan gain control state\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
			
 
				-    fprintf(stderr, "Failed to populate log scale state\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  FrontendReset(state);
			
 
				-
			
 
				-  // All good, return a true value.
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void FrontendFreeStateContents(struct FrontendState* state) {
			
 
				-  WindowFreeStateContents(&state->window);
			
 
				-  FftFreeStateContents(&state->fft);
			
 
				-  FilterbankFreeStateContents(&state->filterbank);
			
 
				-  NoiseReductionFreeStateContents(&state->noise_reduction);
			
 
				-  PcanGainControlFreeStateContents(&state->pcan_gain_control);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h
@@ -1,52 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct FrontendConfig {
			
 
				-  struct WindowConfig window;
			
 
				-  struct FilterbankConfig filterbank;
			
 
				-  struct NoiseReductionConfig noise_reduction;
			
 
				-  struct PcanGainControlConfig pcan_gain_control;
			
 
				-  struct LogScaleConfig log_scale;
			
 
				-};
			
 
				-
			
 
				-// Fills the frontendConfig with "sane" defaults.
			
 
				-void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
			
 
				-
			
 
				-// Allocates any buffers.
			
 
				-int FrontendPopulateState(const struct FrontendConfig* config,
			
 
				-                          struct FrontendState* state, int sample_rate);
			
 
				-
			
 
				-// Frees any allocated buffers.
			
 
				-void FrontendFreeStateContents(struct FrontendState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h
@@ -1,48 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
			
 
				-
			
 
				-// This header file should be included in all variants of kiss_fft_$type.{h,cc}
			
 
				-// so that their sub-included source files do not mistakenly wrap libc header
			
 
				-// files within their kissfft_$type namespaces.
			
 
				-// E.g, This header avoids kissfft_int16.h containing:
			
 
				-//   namespace kiss_fft_int16 {
			
 
				-//     #include "kiss_fft.h"
			
 
				-//   }
			
 
				-// where kiss_fft_.h contains:
			
 
				-//   #include <math.h>
			
 
				-//
			
 
				-// TRICK: By including the following header files here, their preprocessor
			
 
				-// header guards prevent them being re-defined inside of the kiss_fft_$type
			
 
				-// namespaces declared within the kiss_fft_$type.{h,cc} sources.
			
 
				-// Note that the original kiss_fft*.h files are untouched since they
			
 
				-// may be used in libraries that include them directly.
			
 
				-
			
 
				-#include <limits.h>
			
 
				-#include <math.h>
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-#ifdef FIXED_POINT
			
 
				-#include <sys/types.h>
			
 
				-#endif
			
 
				-
			
 
				-#ifdef USE_SIMD
			
 
				-#include <xmmintrin.h>
			
 
				-#endif
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h
@@ -1,33 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
			
 
				-
			
 
				-// Wrap 16-bit kiss fft in its own namespace. Enables us to link an application
			
 
				-// with different kiss fft resultions (16/32 bit interger, float, double)
			
 
				-// without getting a linker error.
			
 
				-#define FIXED_POINT 16
			
 
				-namespace kissfft_fixed16 {
			
 
				-#include "kiss_fft.h"
			
 
				-#include "tools/kiss_fftr.h"
			
 
				-}  // namespace kissfft_fixed16
			
 
				-#undef FIXED_POINT
			
 
				-#undef kiss_fft_scalar
			
 
				-#undef KISS_FFT_H
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c
@@ -1,30 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
			
 
				-const uint16_t kLogLut[]
			
 
				-#ifndef _MSC_VER
			
 
				-    __attribute__((aligned(4)))
			
 
				-#endif  // _MSV_VER
			
 
				-    = {0,    224,  442,  654,  861,  1063, 1259, 1450, 1636, 1817, 1992, 2163,
			
 
				-       2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
			
 
				-       3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
			
 
				-       5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
			
 
				-       5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
			
 
				-       5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
			
 
				-       5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
			
 
				-       4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
			
 
				-       3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
			
 
				-       2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
			
 
				-       1094, 963,  830,  695,  559,  421,  282,  142,  0,    0};
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h
@@ -1,40 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// Number of segments in the log lookup table. The table will be kLogSegments+1
			
 
				-// in length (with some padding).
			
 
				-#define kLogSegments 128
			
 
				-#define kLogSegmentsLog2 7
			
 
				-
			
 
				-// Scale used by lookup table.
			
 
				-#define kLogScale 65536
			
 
				-#define kLogScaleLog2 16
			
 
				-#define kLogCoeff 45426
			
 
				-
			
 
				-extern const uint16_t kLogLut[];
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
@@ -1,83 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
			
 
				-
			
 
				-#define kuint16max 0x0000FFFF
			
 
				-
			
 
				-// The following functions implement integer logarithms of various sizes. The
			
 
				-// approximation is calculated according to method described in
			
 
				-//       www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
			
 
				-//       publicaciones/SPL2007/Log10-spl07.pdf
			
 
				-// It first calculates log2 of the input and then converts it to natural
			
 
				-// logarithm.
			
 
				-
			
 
				-static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
			
 
				-  // Part 1
			
 
				-  int32_t frac = x - (1LL << log2x);
			
 
				-  if (log2x < kLogScaleLog2) {
			
 
				-    frac <<= kLogScaleLog2 - log2x;
			
 
				-  } else {
			
 
				-    frac >>= log2x - kLogScaleLog2;
			
 
				-  }
			
 
				-  // Part 2
			
 
				-  const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
			
 
				-  const uint32_t seg_unit =
			
 
				-      (((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
			
 
				-
			
 
				-  const int32_t c0 = kLogLut[base_seg];
			
 
				-  const int32_t c1 = kLogLut[base_seg + 1];
			
 
				-  const int32_t seg_base = seg_unit * base_seg;
			
 
				-  const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
			
 
				-  return frac + c0 + rel_pos;
			
 
				-}
			
 
				-
			
 
				-static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
			
 
				-  const uint32_t integer = MostSignificantBit32(x) - 1;
			
 
				-  const uint32_t fraction = Log2FractionPart(x, integer);
			
 
				-  const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
			
 
				-  const uint32_t round = kLogScale / 2;
			
 
				-  const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
			
 
				-  // Finally scale to our output scale
			
 
				-  const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
			
 
				-  return loge_scaled;
			
 
				-}
			
 
				-
			
 
				-uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
			
 
				-                        int signal_size, int correction_bits) {
			
 
				-  const int scale_shift = state->scale_shift;
			
 
				-  uint16_t* output = (uint16_t*)signal;
			
 
				-  uint16_t* ret = output;
			
 
				-  int i;
			
 
				-  for (i = 0; i < signal_size; ++i) {
			
 
				-    uint32_t value = *signal++;
			
 
				-    if (state->enable_log) {
			
 
				-      if (correction_bits < 0) {
			
 
				-        value >>= -correction_bits;
			
 
				-      } else {
			
 
				-        value <<= correction_bits;
			
 
				-      }
			
 
				-      if (value > 1) {
			
 
				-        value = Log(value, scale_shift);
			
 
				-      } else {
			
 
				-        value = 0;
			
 
				-      }
			
 
				-    }
			
 
				-    *output++ = (value < kuint16max) ? value : kuint16max;
			
 
				-  }
			
 
				-  return ret;
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h
@@ -1,39 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct LogScaleState {
			
 
				-  int enable_log;
			
 
				-  int scale_shift;
			
 
				-};
			
 
				-
			
 
				-// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
			
 
				-// that the signal array will be modified.
			
 
				-uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
			
 
				-                        int signal_size, int correction_bits);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c
@@ -1,27 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
			
 
				-
			
 
				-void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
			
 
				-  config->enable_log = 1;
			
 
				-  config->scale_shift = 6;
			
 
				-}
			
 
				-
			
 
				-int LogScalePopulateState(const struct LogScaleConfig* config,
			
 
				-                          struct LogScaleState* state) {
			
 
				-  state->enable_log = config->enable_log;
			
 
				-  state->scale_shift = config->scale_shift;
			
 
				-  return 1;
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h
@@ -1,45 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct LogScaleConfig {
			
 
				-  // set to false (0) to disable this module
			
 
				-  int enable_log;
			
 
				-  // scale results by 2^(scale_shift)
			
 
				-  int scale_shift;
			
 
				-};
			
 
				-
			
 
				-// Populates the LogScaleConfig with "sane" default values.
			
 
				-void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
			
 
				-
			
 
				-// Allocates any buffers.
			
 
				-int LogScalePopulateState(const struct LogScaleConfig* config,
			
 
				-                          struct LogScaleState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c
@@ -1,51 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
			
 
				-
			
 
				-#include <string.h>
			
 
				-
			
 
				-void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
			
 
				-  int i;
			
 
				-  for (i = 0; i < state->num_channels; ++i) {
			
 
				-    const uint32_t smoothing =
			
 
				-        ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
			
 
				-    const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
			
 
				-
			
 
				-    // Update the estimate of the noise.
			
 
				-    const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
			
 
				-    uint32_t estimate =
			
 
				-        (((uint64_t)signal_scaled_up * smoothing) +
			
 
				-         ((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
			
 
				-        kNoiseReductionBits;
			
 
				-    state->estimate[i] = estimate;
			
 
				-
			
 
				-    // Make sure that we can't get a negative value for the signal - estimate.
			
 
				-    if (estimate > signal_scaled_up) {
			
 
				-      estimate = signal_scaled_up;
			
 
				-    }
			
 
				-
			
 
				-    const uint32_t floor =
			
 
				-        ((uint64_t)signal[i] * state->min_signal_remaining) >>
			
 
				-        kNoiseReductionBits;
			
 
				-    const uint32_t subtracted =
			
 
				-        (signal_scaled_up - estimate) >> state->smoothing_bits;
			
 
				-    const uint32_t output = subtracted > floor ? subtracted : floor;
			
 
				-    signal[i] = output;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void NoiseReductionReset(struct NoiseReductionState* state) {
			
 
				-  memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h
@@ -1,46 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
			
 
				-
			
 
				-#define kNoiseReductionBits 14
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct NoiseReductionState {
			
 
				-  int smoothing_bits;
			
 
				-  uint16_t even_smoothing;
			
 
				-  uint16_t odd_smoothing;
			
 
				-  uint16_t min_signal_remaining;
			
 
				-  int num_channels;
			
 
				-  uint32_t* estimate;
			
 
				-};
			
 
				-
			
 
				-// Removes stationary noise from each channel of the signal using a low pass
			
 
				-// filter.
			
 
				-void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
			
 
				-
			
 
				-void NoiseReductionReset(struct NoiseReductionState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c
@@ -1,45 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
			
 
				-
			
 
				-#include <stdio.h>
			
 
				-
			
 
				-void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
			
 
				-  config->smoothing_bits = 10;
			
 
				-  config->even_smoothing = 0.025;
			
 
				-  config->odd_smoothing = 0.06;
			
 
				-  config->min_signal_remaining = 0.05;
			
 
				-}
			
 
				-
			
 
				-int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
			
 
				-                                struct NoiseReductionState* state,
			
 
				-                                int num_channels) {
			
 
				-  state->smoothing_bits = config->smoothing_bits;
			
 
				-  state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
			
 
				-  state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
			
 
				-  state->min_signal_remaining =
			
 
				-      config->min_signal_remaining * (1 << kNoiseReductionBits);
			
 
				-  state->num_channels = num_channels;
			
 
				-  state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
			
 
				-  if (state->estimate == NULL) {
			
 
				-    fprintf(stderr, "Failed to alloc estimate buffer\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
			
 
				-  free(state->estimate);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h
@@ -1,50 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct NoiseReductionConfig {
			
 
				-  // scale the signal up by 2^(smoothing_bits) before reduction
			
 
				-  int smoothing_bits;
			
 
				-  // smoothing coefficient for even-numbered channels
			
 
				-  float even_smoothing;
			
 
				-  // smoothing coefficient for odd-numbered channels
			
 
				-  float odd_smoothing;
			
 
				-  // fraction of signal to preserve (1.0 disables this module)
			
 
				-  float min_signal_remaining;
			
 
				-};
			
 
				-
			
 
				-// Populates the NoiseReductionConfig with "sane" default values.
			
 
				-void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
			
 
				-
			
 
				-// Allocates any buffers.
			
 
				-int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
			
 
				-                                struct NoiseReductionState* state,
			
 
				-                                int num_channels);
			
 
				-
			
 
				-// Frees any allocated buffers.
			
 
				-void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
@@ -1,56 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
			
 
				-
			
 
				-int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
			
 
				-  if (x <= 2) {
			
 
				-    return lut[x];
			
 
				-  }
			
 
				-
			
 
				-  const int16_t interval = MostSignificantBit32(x);
			
 
				-  lut += 4 * interval - 6;
			
 
				-
			
 
				-  const int16_t frac =
			
 
				-      ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
			
 
				-      0x3FF;
			
 
				-
			
 
				-  int32_t result = ((int32_t)lut[2] * frac) >> 5;
			
 
				-  result += (int32_t)((uint32_t)lut[1] << 5);
			
 
				-  result *= frac;
			
 
				-  result = (result + (1 << 14)) >> 15;
			
 
				-  result += lut[0];
			
 
				-  return (int16_t)result;
			
 
				-}
			
 
				-
			
 
				-uint32_t PcanShrink(const uint32_t x) {
			
 
				-  if (x < (2 << kPcanSnrBits)) {
			
 
				-    return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
			
 
				-  } else {
			
 
				-    return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PcanGainControlApply(struct PcanGainControlState* state,
			
 
				-                          uint32_t* signal) {
			
 
				-  int i;
			
 
				-  for (i = 0; i < state->num_channels; ++i) {
			
 
				-    const uint32_t gain =
			
 
				-        WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
			
 
				-    const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
			
 
				-    signal[i] = PcanShrink(snr);
			
 
				-  }
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h
@@ -1,47 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#define kPcanSnrBits 12
			
 
				-#define kPcanOutputBits 6
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// Details at https://research.google/pubs/pub45911.pdf
			
 
				-struct PcanGainControlState {
			
 
				-  int enable_pcan;
			
 
				-  uint32_t* noise_estimate;
			
 
				-  int num_channels;
			
 
				-  int16_t* gain_lut;
			
 
				-  int32_t snr_shift;
			
 
				-};
			
 
				-
			
 
				-int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
			
 
				-
			
 
				-uint32_t PcanShrink(const uint32_t x);
			
 
				-
			
 
				-void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
@@ -1,92 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
			
 
				-
			
 
				-#include <math.h>
			
 
				-#include <stdio.h>
			
 
				-
			
 
				-#define kint16max 0x00007FFF
			
 
				-
			
 
				-void PcanGainControlFillConfigWithDefaults(
			
 
				-    struct PcanGainControlConfig* config) {
			
 
				-  config->enable_pcan = 0;
			
 
				-  config->strength = 0.95;
			
 
				-  config->offset = 80.0;
			
 
				-  config->gain_bits = 21;
			
 
				-}
			
 
				-
			
 
				-int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
			
 
				-                               int32_t input_bits, uint32_t x) {
			
 
				-  const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
			
 
				-  const float gain_as_float =
			
 
				-      ((uint32_t)1 << config->gain_bits) *
			
 
				-      powf(x_as_float + config->offset, -config->strength);
			
 
				-
			
 
				-  if (gain_as_float > kint16max) {
			
 
				-    return kint16max;
			
 
				-  }
			
 
				-  return (int16_t)(gain_as_float + 0.5f);
			
 
				-}
			
 
				-
			
 
				-int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
			
 
				-                                 struct PcanGainControlState* state,
			
 
				-                                 uint32_t* noise_estimate,
			
 
				-                                 const int num_channels,
			
 
				-                                 const uint16_t smoothing_bits,
			
 
				-                                 const int32_t input_correction_bits) {
			
 
				-  state->enable_pcan = config->enable_pcan;
			
 
				-  if (!state->enable_pcan) {
			
 
				-    return 1;
			
 
				-  }
			
 
				-  state->noise_estimate = noise_estimate;
			
 
				-  state->num_channels = num_channels;
			
 
				-  state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
			
 
				-  if (state->gain_lut == NULL) {
			
 
				-    fprintf(stderr, "Failed to allocate gain LUT\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-  state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
			
 
				-
			
 
				-  const int32_t input_bits = smoothing_bits - input_correction_bits;
			
 
				-  state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
			
 
				-  state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
			
 
				-  state->gain_lut -= 6;
			
 
				-  int interval;
			
 
				-  for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
			
 
				-    const uint32_t x0 = (uint32_t)1 << (interval - 1);
			
 
				-    const uint32_t x1 = x0 + (x0 >> 1);
			
 
				-    const uint32_t x2 =
			
 
				-        (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
			
 
				-
			
 
				-    const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
			
 
				-    const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
			
 
				-    const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
			
 
				-
			
 
				-    const int32_t diff1 = (int32_t)y1 - y0;
			
 
				-    const int32_t diff2 = (int32_t)y2 - y0;
			
 
				-    const int32_t a1 = 4 * diff1 - diff2;
			
 
				-    const int32_t a2 = diff2 - a1;
			
 
				-
			
 
				-    state->gain_lut[4 * interval] = y0;
			
 
				-    state->gain_lut[4 * interval + 1] = (int16_t)a1;
			
 
				-    state->gain_lut[4 * interval + 2] = (int16_t)a2;
			
 
				-  }
			
 
				-  state->gain_lut += 6;
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
			
 
				-  free(state->gain_lut);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h
@@ -1,57 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
			
 
				-
			
 
				-#define kWideDynamicFunctionBits 32
			
 
				-#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct PcanGainControlConfig {
			
 
				-  // set to false (0) to disable this module
			
 
				-  int enable_pcan;
			
 
				-  // gain normalization exponent (0.0 disables, 1.0 full strength)
			
 
				-  float strength;
			
 
				-  // positive value added in the normalization denominator
			
 
				-  float offset;
			
 
				-  // number of fractional bits in the gain
			
 
				-  int gain_bits;
			
 
				-};
			
 
				-
			
 
				-void PcanGainControlFillConfigWithDefaults(
			
 
				-    struct PcanGainControlConfig* config);
			
 
				-
			
 
				-int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
			
 
				-                               int32_t input_bits, uint32_t x);
			
 
				-
			
 
				-int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
			
 
				-                                 struct PcanGainControlState* state,
			
 
				-                                 uint32_t* noise_estimate,
			
 
				-                                 const int num_channels,
			
 
				-                                 const uint16_t smoothing_bits,
			
 
				-                                 const int32_t input_correction_bits);
			
 
				-
			
 
				-void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c
@@ -1,70 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
			
 
				-
			
 
				-#include <string.h>
			
 
				-
			
 
				-int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
			
 
				-                         size_t num_samples, size_t* num_samples_read) {
			
 
				-  const int size = state->size;
			
 
				-
			
 
				-  // Copy samples from the samples buffer over to our local input.
			
 
				-  size_t max_samples_to_copy = state->size - state->input_used;
			
 
				-  if (max_samples_to_copy > num_samples) {
			
 
				-    max_samples_to_copy = num_samples;
			
 
				-  }
			
 
				-  memcpy(state->input + state->input_used, samples,
			
 
				-         max_samples_to_copy * sizeof(*samples));
			
 
				-  *num_samples_read = max_samples_to_copy;
			
 
				-  state->input_used += max_samples_to_copy;
			
 
				-
			
 
				-  if (state->input_used < state->size) {
			
 
				-    // We don't have enough samples to compute a window.
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Apply the window to the input.
			
 
				-  const int16_t* coefficients = state->coefficients;
			
 
				-  const int16_t* input = state->input;
			
 
				-  int16_t* output = state->output;
			
 
				-  int i;
			
 
				-  int16_t max_abs_output_value = 0;
			
 
				-  for (i = 0; i < size; ++i) {
			
 
				-    int16_t new_value =
			
 
				-        (((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
			
 
				-    *output++ = new_value;
			
 
				-    if (new_value < 0) {
			
 
				-      new_value = -new_value;
			
 
				-    }
			
 
				-    if (new_value > max_abs_output_value) {
			
 
				-      max_abs_output_value = new_value;
			
 
				-    }
			
 
				-  }
			
 
				-  // Shuffle the input down by the step size, and update how much we have used.
			
 
				-  memmove(state->input, state->input + state->step,
			
 
				-          sizeof(*state->input) * (state->size - state->step));
			
 
				-  state->input_used -= state->step;
			
 
				-  state->max_abs_output_value = max_abs_output_value;
			
 
				-
			
 
				-  // Indicate that the output buffer is valid for the next stage.
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void WindowReset(struct WindowState* state) {
			
 
				-  memset(state->input, 0, state->size * sizeof(*state->input));
			
 
				-  memset(state->output, 0, state->size * sizeof(*state->output));
			
 
				-  state->input_used = 0;
			
 
				-  state->max_abs_output_value = 0;
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h
@@ -1,49 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
			
 
				-
			
 
				-#include <stdint.h>
			
 
				-#include <stdlib.h>
			
 
				-
			
 
				-#define kFrontendWindowBits 12
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct WindowState {
			
 
				-  size_t size;
			
 
				-  int16_t* coefficients;
			
 
				-  size_t step;
			
 
				-
			
 
				-  int16_t* input;
			
 
				-  size_t input_used;
			
 
				-  int16_t* output;
			
 
				-  int16_t max_abs_output_value;
			
 
				-};
			
 
				-
			
 
				-// Applies a window to the samples coming in, stepping forward at the given
			
 
				-// rate.
			
 
				-int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
			
 
				-                         size_t num_samples, size_t* num_samples_read);
			
 
				-
			
 
				-void WindowReset(struct WindowState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c
@@ -1,73 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
			
 
				-
			
 
				-#include <math.h>
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <string.h>
			
 
				-
			
 
				-// Some platforms don't have M_PI
			
 
				-#ifndef M_PI
			
 
				-#define M_PI 3.14159265358979323846
			
 
				-#endif
			
 
				-
			
 
				-void WindowFillConfigWithDefaults(struct WindowConfig* config) {
			
 
				-  config->size_ms = 25;
			
 
				-  config->step_size_ms = 10;
			
 
				-}
			
 
				-
			
 
				-int WindowPopulateState(const struct WindowConfig* config,
			
 
				-                        struct WindowState* state, int sample_rate) {
			
 
				-  state->size = config->size_ms * sample_rate / 1000;
			
 
				-  state->step = config->step_size_ms * sample_rate / 1000;
			
 
				-
			
 
				-  state->coefficients = malloc(state->size * sizeof(*state->coefficients));
			
 
				-  if (state->coefficients == NULL) {
			
 
				-    fprintf(stderr, "Failed to allocate window coefficients\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Populate the window values.
			
 
				-  const float arg = M_PI * 2.0 / ((float)state->size);
			
 
				-  int i;
			
 
				-  for (i = 0; i < state->size; ++i) {
			
 
				-    float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
			
 
				-    // Scale it to fixed point and round it.
			
 
				-    state->coefficients[i] =
			
 
				-        floor(float_value * (1 << kFrontendWindowBits) + 0.5);
			
 
				-  }
			
 
				-
			
 
				-  state->input_used = 0;
			
 
				-  state->input = malloc(state->size * sizeof(*state->input));
			
 
				-  if (state->input == NULL) {
			
 
				-    fprintf(stderr, "Failed to allocate window input\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  state->output = malloc(state->size * sizeof(*state->output));
			
 
				-  if (state->output == NULL) {
			
 
				-    fprintf(stderr, "Failed to allocate window output\n");
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  return 1;
			
 
				-}
			
 
				-
			
 
				-void WindowFreeStateContents(struct WindowState* state) {
			
 
				-  free(state->coefficients);
			
 
				-  free(state->input);
			
 
				-  free(state->output);
			
 
				-}
			
--- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h
@@ -1,45 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
			
 
				-
			
 
				-#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-struct WindowConfig {
			
 
				-  // length of window frame in milliseconds
			
 
				-  size_t size_ms;
			
 
				-  // length of step for next frame in milliseconds
			
 
				-  size_t step_size_ms;
			
 
				-};
			
 
				-
			
 
				-// Populates the WindowConfig with "sane" default values.
			
 
				-void WindowFillConfigWithDefaults(struct WindowConfig* config);
			
 
				-
			
 
				-// Allocates any buffers.
			
 
				-int WindowPopulateState(const struct WindowConfig* config,
			
 
				-                        struct WindowState* state, int sample_rate);
			
 
				-
			
 
				-// Frees any allocated buffers.
			
 
				-void WindowFreeStateContents(struct WindowState* state);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}  // extern "C"
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h
@@ -1,1180 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
			
 
				-#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
			
 
				-#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
			
 
				-#endif
			
 
				-#endif
			
 
				-
			
 
				-#include <functional>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-constexpr int kReverseShift = -1;
			
 
				-
			
 
				-inline void GetActivationMinMax(FusedActivationFunctionType ac,
			
 
				-                                float* output_activation_min,
			
 
				-                                float* output_activation_max) {
			
 
				-  switch (ac) {
			
 
				-    case FusedActivationFunctionType::kNone:
			
 
				-      *output_activation_min = std::numeric_limits<float>::lowest();
			
 
				-      *output_activation_max = std::numeric_limits<float>::max();
			
 
				-      break;
			
 
				-    case FusedActivationFunctionType::kRelu:
			
 
				-      *output_activation_min = 0.f;
			
 
				-      *output_activation_max = std::numeric_limits<float>::max();
			
 
				-      break;
			
 
				-    case FusedActivationFunctionType::kRelu1:
			
 
				-      *output_activation_min = -1.f;
			
 
				-      *output_activation_max = 1.f;
			
 
				-      break;
			
 
				-    case FusedActivationFunctionType::kRelu6:
			
 
				-      *output_activation_min = 0.f;
			
 
				-      *output_activation_max = 6.f;
			
 
				-      break;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
			
 
				-                                      T output_activation_max) {
			
 
				-  using std::max;
			
 
				-  using std::min;
			
 
				-  return min(max(x, output_activation_min), output_activation_max);
			
 
				-}
			
 
				-
			
 
				-// Legacy function, left for compatibility only.
			
 
				-template <FusedActivationFunctionType Ac>
			
 
				-float ActivationFunction(float x) {
			
 
				-  float output_activation_min, output_activation_max;
			
 
				-  GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
			
 
				-  return ActivationFunctionWithMinMax(x, output_activation_min,
			
 
				-                                      output_activation_max);
			
 
				-}
			
 
				-
			
 
				-inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
			
 
				-                         const float* bias_data, int array_size,
			
 
				-                         float* array_data) {
			
 
				-  if (bias_size == 0) return;
			
 
				-  // Note: see b/132215220: in May 2019 we thought it would be OK to replace
			
 
				-  // this with the Eigen one-liner:
			
 
				-  //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
			
 
				-  // This turned out to severely regress performance: +4ms (i.e. 8%) on
			
 
				-  // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
			
 
				-  TFLITE_DCHECK_EQ((array_size % bias_size), 0);
			
 
				-#ifdef USE_NEON
			
 
				-  float* array_ptr = array_data;
			
 
				-  float* array_end_ptr = array_ptr + array_size;
			
 
				-  const auto clamp_min_vec = vdupq_n_f32(clamp_min);
			
 
				-  const auto clamp_max_vec = vdupq_n_f32(clamp_max);
			
 
				-  for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
			
 
				-    int i = 0;
			
 
				-    for (; i <= bias_size - 16; i += 16) {
			
 
				-      auto b0 = vld1q_f32(bias_data + i);
			
 
				-      auto b1 = vld1q_f32(bias_data + i + 4);
			
 
				-      auto b2 = vld1q_f32(bias_data + i + 8);
			
 
				-      auto b3 = vld1q_f32(bias_data + i + 12);
			
 
				-      auto a0 = vld1q_f32(array_ptr + i);
			
 
				-      auto a1 = vld1q_f32(array_ptr + i + 4);
			
 
				-      auto a2 = vld1q_f32(array_ptr + i + 8);
			
 
				-      auto a3 = vld1q_f32(array_ptr + i + 12);
			
 
				-      auto x0 = vaddq_f32(a0, b0);
			
 
				-      auto x1 = vaddq_f32(a1, b1);
			
 
				-      auto x2 = vaddq_f32(a2, b2);
			
 
				-      auto x3 = vaddq_f32(a3, b3);
			
 
				-      x0 = vmaxq_f32(clamp_min_vec, x0);
			
 
				-      x1 = vmaxq_f32(clamp_min_vec, x1);
			
 
				-      x2 = vmaxq_f32(clamp_min_vec, x2);
			
 
				-      x3 = vmaxq_f32(clamp_min_vec, x3);
			
 
				-      x0 = vminq_f32(clamp_max_vec, x0);
			
 
				-      x1 = vminq_f32(clamp_max_vec, x1);
			
 
				-      x2 = vminq_f32(clamp_max_vec, x2);
			
 
				-      x3 = vminq_f32(clamp_max_vec, x3);
			
 
				-      vst1q_f32(array_ptr + i, x0);
			
 
				-      vst1q_f32(array_ptr + i + 4, x1);
			
 
				-      vst1q_f32(array_ptr + i + 8, x2);
			
 
				-      vst1q_f32(array_ptr + i + 12, x3);
			
 
				-    }
			
 
				-    for (; i <= bias_size - 4; i += 4) {
			
 
				-      auto b = vld1q_f32(bias_data + i);
			
 
				-      auto a = vld1q_f32(array_ptr + i);
			
 
				-      auto x = vaddq_f32(a, b);
			
 
				-      x = vmaxq_f32(clamp_min_vec, x);
			
 
				-      x = vminq_f32(clamp_max_vec, x);
			
 
				-      vst1q_f32(array_ptr + i, x);
			
 
				-    }
			
 
				-    for (; i < bias_size; i++) {
			
 
				-      array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
			
 
				-                                                  clamp_min, clamp_max);
			
 
				-    }
			
 
				-  }
			
 
				-#else  // not NEON
			
 
				-  for (int array_offset = 0; array_offset < array_size;
			
 
				-       array_offset += bias_size) {
			
 
				-    for (int i = 0; i < bias_size; i++) {
			
 
				-      array_data[array_offset + i] = ActivationFunctionWithMinMax(
			
 
				-          array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
			
 
				-    }
			
 
				-  }
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-// Single-rounding MultiplyByQuantizedMultiplier
			
 
				-#if TFLITE_SINGLE_ROUNDING
			
 
				-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
			
 
				-                                             int32_t quantized_multiplier,
			
 
				-                                             int shift) {
			
 
				-  TFLITE_DCHECK(quantized_multiplier >= 0);
			
 
				-  TFLITE_DCHECK(shift >= -31 && shift <= 30);
			
 
				-
			
 
				-  const int64_t total_shift = 31 - shift;
			
 
				-  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
			
 
				-  int64_t result = x * static_cast<int64_t>(quantized_multiplier) + round;
			
 
				-  result = result >> total_shift;
			
 
				-
			
 
				-  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
			
 
				-                result <= std::numeric_limits<int32_t>::max());
			
 
				-  return static_cast<int32_t>(result);
			
 
				-}
			
 
				-
			
 
				-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				-  TFLITE_DCHECK_LE(shift, 0);
			
 
				-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				-}
			
 
				-
			
 
				-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				-    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				-  TFLITE_DCHECK_GE(shift, 0);
			
 
				-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				-}
			
 
				-
			
 
				-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
			
 
				-                                             int32_t quantized_multiplier,
			
 
				-                                             int shift) {
			
 
				-  // Inputs:
			
 
				-  // - quantized_multiplier has fixed point at bit 31
			
 
				-  // - shift is -31 to +7 (negative for right shift)
			
 
				-  //
			
 
				-  // Assumptions: The following input ranges are assumed
			
 
				-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
			
 
				-  // - scaling is chosen so final scaled result fits in int32_t
			
 
				-  // - input x is in the range -(1<<47) <= x < (1<<47)
			
 
				-  TFLITE_DCHECK(quantized_multiplier >= 0);
			
 
				-  TFLITE_DCHECK(shift >= -31 && shift < 8);
			
 
				-  TFLITE_DCHECK(x >= -(static_cast<int64_t>(1) << 47) &&
			
 
				-                x < (static_cast<int64_t>(1) << 47));
			
 
				-
			
 
				-  const int32_t reduced_multiplier =
			
 
				-      (quantized_multiplier < 0x7FFF0000)
			
 
				-          ? ((quantized_multiplier + (1 << 15)) >> 16)
			
 
				-          : 0x7FFF;
			
 
				-  const int64_t total_shift = 15 - shift;
			
 
				-  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
			
 
				-  int64_t result = x * static_cast<int64_t>(reduced_multiplier) + round;
			
 
				-  result = result >> total_shift;
			
 
				-
			
 
				-  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
			
 
				-                result <= std::numeric_limits<int32_t>::max());
			
 
				-  return static_cast<int32_t>(result);
			
 
				-}
			
 
				-
			
 
				-#ifdef USE_NEON
			
 
				-inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
			
 
				-    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
			
 
				-  TFLITE_DCHECK(quantized_multiplier >= 0);
			
 
				-
			
 
				-  const int right_shift = std::min(-1, shift);
			
 
				-  const int left_shift = shift - right_shift;
			
 
				-
			
 
				-  const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
			
 
				-  const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
			
 
				-  const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
			
 
				-
			
 
				-  int32x4x4_t result;
			
 
				-  result.val[0] = vrshlq_s32(
			
 
				-      vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup),
			
 
				-      right_shift_dup);
			
 
				-
			
 
				-  result.val[1] = vrshlq_s32(
			
 
				-      vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup),
			
 
				-      right_shift_dup);
			
 
				-
			
 
				-  result.val[2] = vrshlq_s32(
			
 
				-      vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup),
			
 
				-      right_shift_dup);
			
 
				-
			
 
				-  result.val[3] = vrshlq_s32(
			
 
				-      vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup),
			
 
				-      right_shift_dup);
			
 
				-
			
 
				-  return result;
			
 
				-}
			
 
				-#endif  // USE_NEON
			
 
				-// Double-rounding MultiplyByQuantizedMultiplier
			
 
				-#else
			
 
				-inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-    int32_t x, int32_t quantized_multiplier, int left_shift) {
			
 
				-  using gemmlowp::RoundingDivideByPOT;
			
 
				-  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				-  return RoundingDivideByPOT(
			
 
				-      SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
			
 
				-}
			
 
				-
			
 
				-inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				-    int32_t x, int32_t quantized_multiplier, int left_shift) {
			
 
				-  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				-  return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
			
 
				-                                           quantized_multiplier);
			
 
				-}
			
 
				-
			
 
				-inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
			
 
				-                                             int32_t quantized_multiplier,
			
 
				-                                             int shift) {
			
 
				-  using gemmlowp::RoundingDivideByPOT;
			
 
				-  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				-  int left_shift = shift > 0 ? shift : 0;
			
 
				-  int right_shift = shift > 0 ? 0 : -shift;
			
 
				-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
			
 
				-                                 x * (1 << left_shift), quantized_multiplier),
			
 
				-                             right_shift);
			
 
				-}
			
 
				-
			
 
				-inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
			
 
				-                                             int32_t quantized_multiplier,
			
 
				-                                             int shift) {
			
 
				-  // Inputs:
			
 
				-  // - quantized_multiplier has fixed point at bit 31
			
 
				-  // - shift is -31 to +7 (negative for right shift)
			
 
				-  //
			
 
				-  // Assumptions: The following input ranges are assumed
			
 
				-  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
			
 
				-  // - scaling is chosen so final scaled result fits in int32_t
			
 
				-  // - input x is in the range -(1<<47) <= x < (1<<47)
			
 
				-  assert(quantized_multiplier >= 0);
			
 
				-  assert(shift >= -31 && shift < 8);
			
 
				-  assert(x >= -(static_cast<int64_t>(1) << 47) &&
			
 
				-         x < (static_cast<int64_t>(1) << 47));
			
 
				-
			
 
				-  int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000)
			
 
				-                                   ? ((quantized_multiplier + (1 << 15)) >> 16)
			
 
				-                                   : 0x7FFF;
			
 
				-  int total_shift = 15 - shift;
			
 
				-  x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
			
 
				-  int32_t result = x >> total_shift;
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-#ifdef USE_NEON
			
 
				-// Round uses ARM's rounding shift right.
			
 
				-inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
			
 
				-    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
			
 
				-  const int left_shift = std::max(shift, 0);
			
 
				-  const int right_shift = std::min(shift, 0);
			
 
				-  int32x4x4_t result;
			
 
				-
			
 
				-  int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
			
 
				-  int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
			
 
				-  int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
			
 
				-
			
 
				-  result.val[0] =
			
 
				-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup),
			
 
				-                               multiplier_dup),
			
 
				-                 right_shift_dup);
			
 
				-
			
 
				-  result.val[1] =
			
 
				-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup),
			
 
				-                               multiplier_dup),
			
 
				-                 right_shift_dup);
			
 
				-
			
 
				-  result.val[2] =
			
 
				-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup),
			
 
				-                               multiplier_dup),
			
 
				-                 right_shift_dup);
			
 
				-
			
 
				-  result.val[3] =
			
 
				-      vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup),
			
 
				-                               multiplier_dup),
			
 
				-                 right_shift_dup);
			
 
				-
			
 
				-  return result;
			
 
				-}
			
 
				-#endif  // USE_NEON
			
 
				-#endif  // TFLITE_SINGLE_ROUNDING
			
 
				-
			
 
				-template <typename T>
			
 
				-int CountLeadingZeros(T integer_input) {
			
 
				-  static_assert(std::is_unsigned<T>::value,
			
 
				-                "Only unsigned integer types handled.");
			
 
				-#if defined(__GNUC__)
			
 
				-  return integer_input ? __builtin_clz(integer_input)
			
 
				-                       : std::numeric_limits<T>::digits;
			
 
				-#else
			
 
				-  if (integer_input == 0) {
			
 
				-    return std::numeric_limits<T>::digits;
			
 
				-  }
			
 
				-
			
 
				-  const T one_in_leading_positive = static_cast<T>(1)
			
 
				-                                    << (std::numeric_limits<T>::digits - 1);
			
 
				-  int leading_zeros = 0;
			
 
				-  while (integer_input < one_in_leading_positive) {
			
 
				-    integer_input <<= 1;
			
 
				-    ++leading_zeros;
			
 
				-  }
			
 
				-  return leading_zeros;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline int CountLeadingSignBits(T integer_input) {
			
 
				-  static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
			
 
				-#if defined(__GNUC__) && !defined(__clang__)
			
 
				-  return integer_input ? __builtin_clrsb(integer_input)
			
 
				-                       : std::numeric_limits<T>::digits;
			
 
				-#else
			
 
				-  using U = typename std::make_unsigned<T>::type;
			
 
				-  return integer_input >= 0
			
 
				-             ? CountLeadingZeros(static_cast<U>(integer_input)) - 1
			
 
				-         : integer_input != std::numeric_limits<T>::min()
			
 
				-             ? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
			
 
				-             : 0;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
			
 
				-template <typename Integer>
			
 
				-inline Integer FloorLog2(Integer n) {
			
 
				-  static_assert(std::is_integral<Integer>::value, "");
			
 
				-  static_assert(std::is_signed<Integer>::value, "");
			
 
				-  static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
			
 
				-  TFLITE_CHECK_GT(n, 0);
			
 
				-  if (sizeof(Integer) == 4) {
			
 
				-    return 30 - CountLeadingSignBits(n);
			
 
				-  } else {
			
 
				-    return 62 - CountLeadingSignBits(n);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// The size of the LUT depends on the type of input. For int8 inputs a simple
			
 
				-// 256 entries LUT is used. For int16 inputs the high 9 bits are used for
			
 
				-// indexing and the 7 remaining bits are used for interpolation. We thus use a
			
 
				-// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
			
 
				-// to interpolate the last value.
			
 
				-template <typename LutInT>
			
 
				-constexpr int lut_size() {
			
 
				-  static_assert(std::is_same<LutInT, int8_t>::value ||
			
 
				-                    std::is_same<LutInT, int16_t>::value,
			
 
				-                "Only LUTs with int8 or int16 inputs are supported.");
			
 
				-  return std::is_same<LutInT, int8_t>::value ? 256 : 513;
			
 
				-}
			
 
				-
			
 
				-// Generate a LUT for 'func' which can be used to approximate functions like
			
 
				-// exp, log, ...
			
 
				-//
			
 
				-// - func: the function to build the LUT for (e.g exp(x))
			
 
				-// - input_min, input_max: range of the func inputs
			
 
				-// - output_min, output_max: range of the func outputs
			
 
				-// - lut: pointer to the LUT table to fill, the table must be of size
			
 
				-// lut_size<LutInT>()
			
 
				-template <typename FloatT, typename LutInT, typename LutOutT>
			
 
				-inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
			
 
				-                    FloatT output_min, FloatT output_max, LutOutT* lut) {
			
 
				-  static_assert(std::is_same<LutInT, int8_t>::value ||
			
 
				-                    std::is_same<LutInT, int16_t>::value,
			
 
				-                "Only LUTs with int8 or int16 inputs are supported.");
			
 
				-  static_assert(std::is_same<LutOutT, int8_t>::value ||
			
 
				-                    std::is_same<LutOutT, int16_t>::value,
			
 
				-                "Only LUTs with int8 or int16 outputs are supported.");
			
 
				-  static_assert(std::is_floating_point<FloatT>::value,
			
 
				-                "FloatT must be a floating-point type.");
			
 
				-
			
 
				-  const int nb_steps = std::is_same<LutInT, int8_t>::value ? 256 : 512;
			
 
				-  const FloatT step = (input_max - input_min) / nb_steps;
			
 
				-  const FloatT half_step = step / 2;
			
 
				-  const FloatT output_scaling_inv =
			
 
				-      static_cast<FloatT>(std::numeric_limits<LutOutT>::max() -
			
 
				-                          std::numeric_limits<LutOutT>::min() + 1) /
			
 
				-      (output_max - output_min);
			
 
				-  const FloatT table_min =
			
 
				-      static_cast<FloatT>(std::numeric_limits<LutOutT>::min());
			
 
				-  const FloatT table_max =
			
 
				-      static_cast<FloatT>(std::numeric_limits<LutOutT>::max());
			
 
				-
			
 
				-  for (int i = 0; i < nb_steps; i++) {
			
 
				-    const FloatT val = func(input_min + i * step);
			
 
				-    const FloatT val_midpoint = func(input_min + i * step + half_step);
			
 
				-    const FloatT val_next = func(input_min + (i + 1) * step);
			
 
				-
			
 
				-    const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
			
 
				-    const FloatT midpoint_interp_val =
			
 
				-        TfLiteRound((val_next * output_scaling_inv +
			
 
				-                     TfLiteRound(val * output_scaling_inv)) /
			
 
				-                    2);
			
 
				-    const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
			
 
				-    const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
			
 
				-    const FloatT bias = TfLiteRound(midpoint_err / 2);
			
 
				-
			
 
				-    lut[i] = static_cast<LutOutT>(std::min<FloatT>(
			
 
				-        std::max<FloatT>(sample_val - bias, table_min), table_max));
			
 
				-  }
			
 
				-
			
 
				-  const bool with_extra_interpolation_value =
			
 
				-      std::is_same<LutInT, int16_t>::value;
			
 
				-  if (with_extra_interpolation_value) {
			
 
				-    lut[nb_steps] = static_cast<LutOutT>(std::min<FloatT>(
			
 
				-        std::max<FloatT>(TfLiteRound(func(input_max) * output_scaling_inv),
			
 
				-                         table_min),
			
 
				-        table_max));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// LUT must have 513 values
			
 
				-template <typename LutOutT>
			
 
				-inline LutOutT lut_lookup_with_interpolation(int16_t value,
			
 
				-                                             const LutOutT* lut) {
			
 
				-  static_assert(std::is_same<LutOutT, int8_t>::value ||
			
 
				-                    std::is_same<LutOutT, int16_t>::value,
			
 
				-                "Only LUTs with int8 or int16 outputs are supported.");
			
 
				-  // 512 base values, lut[513] is only used to calculate the slope
			
 
				-  const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
			
 
				-  assert(index < 512 && "LUT index out of range.");
			
 
				-  const int16_t offset = value & 0x7f;
			
 
				-
			
 
				-  // Base and slope are Q0.x
			
 
				-  const LutOutT base = lut[index];
			
 
				-  const LutOutT slope = lut[index + 1] - lut[index];
			
 
				-
			
 
				-  // Q0.x * Q0.7 = Q0.(x + 7)
			
 
				-  // Round and convert from Q0.(x + 7) to Q0.x
			
 
				-  const int delta = (slope * offset + 64) >> 7;
			
 
				-
			
 
				-  // Q0.15 + Q0.15
			
 
				-  return static_cast<LutOutT>(base + delta);
			
 
				-}
			
 
				-
			
 
				-// int16_t -> int16_t table lookup with interpolation
			
 
				-// LUT must have 513 values
			
 
				-inline int16_t lut_lookup(int16_t value, const int16_t* lut) {
			
 
				-  return lut_lookup_with_interpolation(value, lut);
			
 
				-}
			
 
				-
			
 
				-// int16_t -> int8_t table lookup with interpolation
			
 
				-// LUT must have 513 values
			
 
				-inline int8_t lut_lookup(int16_t value, const int8_t* lut) {
			
 
				-  return lut_lookup_with_interpolation(value, lut);
			
 
				-}
			
 
				-
			
 
				-// int8_t -> int8_t table lookup without interpolation
			
 
				-// LUT must have 256 values
			
 
				-inline int8_t lut_lookup(int8_t value, const int8_t* lut) {
			
 
				-  return lut[128 + value];
			
 
				-}
			
 
				-
			
 
				-// int8_t -> int16_t table lookup without interpolation
			
 
				-// LUT must have 256 values
			
 
				-inline int16_t lut_lookup(int8_t value, const int16_t* lut) {
			
 
				-  return lut[128 + value];
			
 
				-}
			
 
				-
			
 
				-// Table of sigmoid(i/24) at 0.16 format - 256 elements.
			
 
				-
			
 
				-// We use combined sigmoid and tanh look-up table, since
			
 
				-// tanh(x) = 2*sigmoid(2*x) -1.
			
 
				-// Both functions are symmetric, so the LUT table is only needed
			
 
				-// for the absolute value of the input.
			
 
				-static const uint16_t sigmoid_table_uint16[256] = {
			
 
				-    32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
			
 
				-    40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
			
 
				-    46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
			
 
				-    52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
			
 
				-    56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
			
 
				-    59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
			
 
				-    61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
			
 
				-    62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
			
 
				-    63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
			
 
				-    64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
			
 
				-    64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
			
 
				-    65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
			
 
				-    65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
			
 
				-    65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
			
 
				-    65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
			
 
				-    65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
			
 
				-    65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
			
 
				-    65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
			
 
				-    65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
			
 
				-    65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
			
 
				-    65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
			
 
				-    65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
			
 
				-    65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
			
 
				-    65534, 65534, 65535};
			
 
				-
			
 
				-// TODO(b/77858996): Add these to gemmlowp.
			
 
				-template <typename IntegerType>
			
 
				-IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
			
 
				-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
			
 
				-  return a;
			
 
				-}
			
 
				-
			
 
				-template <>
			
 
				-inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
			
 
				-  std::int64_t a64 = a;
			
 
				-  std::int64_t b64 = b;
			
 
				-  std::int64_t sum = a64 + b64;
			
 
				-  return static_cast<std::int32_t>(std::min(
			
 
				-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
			
 
				-      std::max(
			
 
				-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
			
 
				-          sum)));
			
 
				-}
			
 
				-
			
 
				-template <typename tRawType, int tIntegerBits>
			
 
				-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
			
 
				-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
			
 
				-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
			
 
				-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
			
 
				-      SaturatingAddNonGemmlowp(a.raw(), b.raw()));
			
 
				-}
			
 
				-
			
 
				-template <typename IntegerType>
			
 
				-IntegerType SaturatingSub(IntegerType a, IntegerType b) {
			
 
				-  static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
			
 
				-  return a;
			
 
				-}
			
 
				-
			
 
				-template <>
			
 
				-inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
			
 
				-  std::int32_t a32 = a;
			
 
				-  std::int32_t b32 = b;
			
 
				-  std::int32_t diff = a32 - b32;
			
 
				-  return static_cast<std::int16_t>(
			
 
				-      std::min(static_cast<int32_t>(32767),
			
 
				-               std::max(static_cast<int32_t>(-32768), diff)));
			
 
				-}
			
 
				-
			
 
				-template <>
			
 
				-inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
			
 
				-  std::int64_t a64 = a;
			
 
				-  std::int64_t b64 = b;
			
 
				-  std::int64_t diff = a64 - b64;
			
 
				-  return static_cast<std::int32_t>(std::min(
			
 
				-      static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
			
 
				-      std::max(
			
 
				-          static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
			
 
				-          diff)));
			
 
				-}
			
 
				-
			
 
				-template <typename tRawType, int tIntegerBits>
			
 
				-gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
			
 
				-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
			
 
				-    gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
			
 
				-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
			
 
				-      SaturatingSub(a.raw(), b.raw()));
			
 
				-}
			
 
				-// End section to be moved to gemmlowp.
			
 
				-
			
 
				-template <typename IntegerType>
			
 
				-IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
			
 
				-  if (exponent == 0) {
			
 
				-    return x;
			
 
				-  }
			
 
				-  using ScalarIntegerType =
			
 
				-      typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
			
 
				-  const IntegerType min =
			
 
				-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
			
 
				-  const IntegerType max =
			
 
				-      gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
			
 
				-  const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
			
 
				-
			
 
				-  const std::int32_t threshold =
			
 
				-      ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
			
 
				-  const IntegerType positive_mask =
			
 
				-      gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
			
 
				-  const IntegerType negative_mask =
			
 
				-      gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
			
 
				-
			
 
				-  IntegerType result = gemmlowp::ShiftLeft(x, exponent);
			
 
				-  result = gemmlowp::SelectUsingMask(positive_mask, max, result);
			
 
				-  result = gemmlowp::SelectUsingMask(negative_mask, min, result);
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-// If we want to leave IntegerBits fixed, then multiplication
			
 
				-// by a power of two has to be saturating/rounding, not exact anymore.
			
 
				-template <typename tRawType, int tIntegerBits>
			
 
				-gemmlowp::FixedPoint<tRawType, tIntegerBits>
			
 
				-SaturatingRoundingMultiplyByPOTParam(
			
 
				-    gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
			
 
				-  return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
			
 
				-      SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
			
 
				-}
			
 
				-
			
 
				-// Convert int32_t multiplier to int16_t with rounding.
			
 
				-inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
			
 
				-                                            int16_t* multiplier_int16_t) {
			
 
				-  TFLITE_DCHECK_GE(multiplier_int32_t, 0);
			
 
				-  static constexpr int32_t kRoundingOffset = 1 << 15;
			
 
				-  if (multiplier_int32_t >=
			
 
				-      std::numeric_limits<int32_t>::max() - kRoundingOffset) {
			
 
				-    *multiplier_int16_t = std::numeric_limits<int16_t>::max();
			
 
				-    return;
			
 
				-  }
			
 
				-  const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
			
 
				-  TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
			
 
				-  TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
			
 
				-  *multiplier_int16_t = result;
			
 
				-  TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
			
 
				-}
			
 
				-
			
 
				-// Minimum output bits to accommodate log of maximum input range.  It actually
			
 
				-// does not matter if one considers, say, [-64,64] or [-64,64).
			
 
				-//
			
 
				-// For example, run this through Octave:
			
 
				-// [0:127; ...
			
 
				-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
			
 
				-//  ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
			
 
				-constexpr int min_log_x_output_bits(int input_bits) {
			
 
				-  return input_bits > 90   ? 7
			
 
				-         : input_bits > 44 ? 6
			
 
				-         : input_bits > 21 ? 5
			
 
				-         : input_bits > 10 ? 4
			
 
				-         : input_bits > 4  ? 3
			
 
				-         : input_bits > 1  ? 2
			
 
				-                           : 1;
			
 
				-}
			
 
				-
			
 
				-// Although currently the name of this function says that it cannot handle
			
 
				-// values less than 1, in practice it can handle as low as 1/x_max, where
			
 
				-// x_max is the largest representable input.  In other words, the output range
			
 
				-// is symmetric.
			
 
				-template <int OutputIntegerBits, int InputIntegerBits>
			
 
				-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
			
 
				-log_x_for_x_greater_than_or_equal_to_1_impl(
			
 
				-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
			
 
				-  // assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
			
 
				-  // assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
			
 
				-  using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
			
 
				-  // The reason for accumulating the result with an extra bit of headroom is
			
 
				-  // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
			
 
				-  // recip_denom will otherwise introduce an error.
			
 
				-  static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
			
 
				-  using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
			
 
				-
			
 
				-  const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 1488522236, std::log(2.0));
			
 
				-  const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
			
 
				-  const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 1518500250, std::sqrt(0.5));
			
 
				-  const FixedPoint0 one_quarter =
			
 
				-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
			
 
				-
			
 
				-  const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
			
 
				-  const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
			
 
				-  const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 1057819769,
			
 
				-      2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
			
 
				-  const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
			
 
				-      FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
			
 
				-
			
 
				-  const FixedPointAccum shifted_quarter =
			
 
				-      gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
			
 
				-
			
 
				-  // Reinterpret the input value as Q0.31, because we will figure out the
			
 
				-  // required shift "ourselves" instead of using, say, Rescale.
			
 
				-  FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
			
 
				-  // z_a_pow_2 = input_integer_bits - z_a_headroom;
			
 
				-  int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
			
 
				-  FixedPoint0 r_a_tmp =
			
 
				-      SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
			
 
				-  const int32_t r_a_raw =
			
 
				-      SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
			
 
				-  // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
			
 
				-  // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
			
 
				-  //                   InputIntegerBits - z_b_headroom - 0.25);
			
 
				-  const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
			
 
				-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
			
 
				-          static_cast<int32_t>(InputIntegerBits - z_a_headroom_plus_1),
			
 
				-          31 - kAccumIntegerBits)),
			
 
				-      shifted_quarter);
			
 
				-
			
 
				-  // z_b is treated like z_a, but premultiplying by sqrt(0.5).
			
 
				-  FixedPoint0 z_b = z_a * sqrt_half;
			
 
				-  int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
			
 
				-  const int32_t r_b_raw =
			
 
				-      SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
			
 
				-  const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
			
 
				-      FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
			
 
				-          static_cast<int32_t>(InputIntegerBits - z_b_headroom),
			
 
				-          31 - kAccumIntegerBits)),
			
 
				-      shifted_quarter);
			
 
				-
			
 
				-  const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
			
 
				-  const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
			
 
				-      std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
			
 
				-
			
 
				-  const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
			
 
				-  FixedPoint0 q = r - sqrt_sqrt_half;
			
 
				-  q = q + q;
			
 
				-
			
 
				-  const FixedPoint0 common_sq = q * q;
			
 
				-  const FixedPoint0 num = q * r + q * common_sq * alpha_n;
			
 
				-  const FixedPoint0 denom_minus_one_0 =
			
 
				-      p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
			
 
				-  const FixedPoint0 recip_denom =
			
 
				-      one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
			
 
				-
			
 
				-  const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
			
 
				-  return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
			
 
				-                                              num_scaled * recip_denom);
			
 
				-}
			
 
				-
			
 
				-template <int OutputIntegerBits, int InputIntegerBits>
			
 
				-inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
			
 
				-log_x_for_x_greater_than_or_equal_to_1(
			
 
				-    gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
			
 
				-  static_assert(
			
 
				-      OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
			
 
				-      "Output integer bits must be sufficient to accommodate logs of inputs.");
			
 
				-  return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
			
 
				-                                                     InputIntegerBits>(
			
 
				-      input_val);
			
 
				-}
			
 
				-
			
 
				-inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
			
 
				-                             int* num_bits_over_unit) {
			
 
				-  int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
			
 
				-  // This is the number of bits to the left of the binary point above 1.0.
			
 
				-  // Consider x=1.25.  In that case shifted_scale=0.8 and
			
 
				-  // no later adjustment will be needed.
			
 
				-  *num_bits_over_unit = x_integer_digits - headroom_plus_one;
			
 
				-  const int32_t shifted_sum_minus_one =
			
 
				-      static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
			
 
				-                           (static_cast<uint32_t>(1) << 31));
			
 
				-
			
 
				-  gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
			
 
				-      gemmlowp::one_over_one_plus_x_for_x_in_0_1(
			
 
				-          gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
			
 
				-  return shifted_scale.raw();
			
 
				-}
			
 
				-
			
 
				-inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
			
 
				-                                             int32_t* output_inv_sqrt,
			
 
				-                                             int* output_shift) {
			
 
				-  TFLITE_DCHECK_GE(input, 0);
			
 
				-  if (input <= 1) {
			
 
				-    // Handle the input value 1 separately to avoid overflow in that case
			
 
				-    // in the general computation below (b/143972021). Also handle 0 as if it
			
 
				-    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
			
 
				-    // but rare/unrealistic input value. We can expect both to occur in some
			
 
				-    // incompletely trained models, but probably not in fully trained models.
			
 
				-    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
			
 
				-    *output_shift = 0;
			
 
				-    return;
			
 
				-  }
			
 
				-  TFLITE_DCHECK_GT(input, 1);
			
 
				-  *output_shift = 11;
			
 
				-  while (input >= (1 << 29)) {
			
 
				-    input /= 4;
			
 
				-    ++*output_shift;
			
 
				-  }
			
 
				-  const unsigned max_left_shift_bits =
			
 
				-      CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
			
 
				-  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
			
 
				-  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
			
 
				-  *output_shift -= left_shift_bit_pairs;
			
 
				-  input <<= 2 * left_shift_bit_pairs;
			
 
				-  TFLITE_DCHECK_GE(input, (1 << 27));
			
 
				-  TFLITE_DCHECK_LT(input, (1 << 29));
			
 
				-  using gemmlowp::FixedPoint;
			
 
				-  using gemmlowp::Rescale;
			
 
				-  using gemmlowp::SaturatingRoundingMultiplyByPOT;
			
 
				-  // Using 3 integer bits gives us enough room for the internal arithmetic in
			
 
				-  // this Newton-Raphson iteration.
			
 
				-  using F3 = FixedPoint<int32_t, 3>;
			
 
				-  using F0 = FixedPoint<int32_t, 0>;
			
 
				-  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
			
 
				-  const F3 fixedpoint_half_input =
			
 
				-      SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
			
 
				-  const F3 fixedpoint_half_three =
			
 
				-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
			
 
				-  // Newton-Raphson iteration
			
 
				-  // Naive unoptimized starting guess: x = 1
			
 
				-  F3 x = F3::One();
			
 
				-  // Naive unoptimized number of iterations: 5
			
 
				-  for (int i = 0; i < 5; i++) {
			
 
				-    const F3 x3 = Rescale<3>(x * x * x);
			
 
				-    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
			
 
				-  }
			
 
				-  const F0 fixedpoint_half_sqrt_2 =
			
 
				-      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
			
 
				-  x = x * fixedpoint_half_sqrt_2;
			
 
				-  *output_inv_sqrt = x.raw();
			
 
				-  if (*output_shift < 0) {
			
 
				-    *output_inv_sqrt <<= -*output_shift;
			
 
				-    *output_shift = 0;
			
 
				-  }
			
 
				-  // Convert right shift (right is positive) to left shift.
			
 
				-  *output_shift *= reverse_shift;
			
 
				-}
			
 
				-
			
 
				-// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
			
 
				-// BROADCASTING.
			
 
				-//
			
 
				-// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
			
 
				-// rectangular array of numbers.
			
 
				-//
			
 
				-// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
			
 
				-// However, as Dims<N> is to be deprecated, this class exists as an adaptor
			
 
				-// to enable simple unoptimized implementations of element-wise broadcasting
			
 
				-// operations.
			
 
				-template <int N>
			
 
				-struct NdArrayDesc {
			
 
				-  // The "extent" of each dimension. Indices along dimension d must be in the
			
 
				-  // half-open interval [0, extents[d]).
			
 
				-  int extents[N];
			
 
				-
			
 
				-  // The number of *elements* (not bytes) between consecutive indices of each
			
 
				-  // dimension.
			
 
				-  int strides[N];
			
 
				-};
			
 
				-
			
 
				-// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
			
 
				-// BROADCASTING.
			
 
				-//
			
 
				-// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
			
 
				-inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
			
 
				-                            int i3) {
			
 
				-  TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
			
 
				-  TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
			
 
				-  TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
			
 
				-  TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
			
 
				-  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
			
 
				-         i3 * desc.strides[3];
			
 
				-}
			
 
				-
			
 
				-inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
			
 
				-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
			
 
				-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
			
 
				-         indexes[4] * desc.strides[4];
			
 
				-}
			
 
				-
			
 
				-inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) {
			
 
				-  return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
			
 
				-         indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
			
 
				-         indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] +
			
 
				-         indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7];
			
 
				-}
			
 
				-
			
 
				-// Given the dimensions of the operands for an element-wise binary broadcast,
			
 
				-// adjusts them so that they can be directly iterated over with simple loops.
			
 
				-// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
			
 
				-// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
			
 
				-//
			
 
				-// This function assumes that the two input shapes are compatible up to
			
 
				-// broadcasting and the shorter one has already been prepended with 1s to be the
			
 
				-// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
			
 
				-// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
			
 
				-// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
			
 
				-// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
			
 
				-//
			
 
				-// When two shapes are compatible up to broadcasting, for each dimension d,
			
 
				-// the input extents are either equal, or one of them is 1.
			
 
				-//
			
 
				-// This function performs the following for each dimension d:
			
 
				-// - If the extents are equal, then do nothing since the loop that walks over
			
 
				-//   both of the input arrays is correct.
			
 
				-// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
			
 
				-//   and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
			
 
				-//   array0 to be referenced *at any index* in dimension d and still access the
			
 
				-//   same slice.
			
 
				-template <int N>
			
 
				-inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
			
 
				-                                                const Dims<N>& input1_dims,
			
 
				-                                                NdArrayDesc<N>* desc0_out,
			
 
				-                                                NdArrayDesc<N>* desc1_out) {
			
 
				-  TFLITE_DCHECK(desc0_out != nullptr);
			
 
				-  TFLITE_DCHECK(desc1_out != nullptr);
			
 
				-
			
 
				-  // Copy dims to desc.
			
 
				-  for (int i = 0; i < N; ++i) {
			
 
				-    desc0_out->extents[i] = input0_dims.sizes[i];
			
 
				-    desc0_out->strides[i] = input0_dims.strides[i];
			
 
				-    desc1_out->extents[i] = input1_dims.sizes[i];
			
 
				-    desc1_out->strides[i] = input1_dims.strides[i];
			
 
				-  }
			
 
				-
			
 
				-  // Walk over each dimension. If the extents are equal do nothing.
			
 
				-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
			
 
				-  // stride 0.
			
 
				-  for (int i = 0; i < N; ++i) {
			
 
				-    const int extent0 = ArraySize(input0_dims, i);
			
 
				-    const int extent1 = ArraySize(input1_dims, i);
			
 
				-    if (extent0 != extent1) {
			
 
				-      if (extent0 == 1) {
			
 
				-        desc0_out->strides[i] = 0;
			
 
				-        desc0_out->extents[i] = extent1;
			
 
				-      } else {
			
 
				-        TFLITE_DCHECK_EQ(extent1, 1);
			
 
				-        desc1_out->strides[i] = 0;
			
 
				-        desc1_out->extents[i] = extent0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Copies dims to desc, calculating strides.
			
 
				-template <int N>
			
 
				-inline void CopyDimsToDesc(const RuntimeShape& input_shape,
			
 
				-                           NdArrayDesc<N>* desc_out) {
			
 
				-  int desc_stride = 1;
			
 
				-  for (int i = N - 1; i >= 0; --i) {
			
 
				-    desc_out->extents[i] = input_shape.Dims(i);
			
 
				-    desc_out->strides[i] = desc_stride;
			
 
				-    desc_stride *= input_shape.Dims(i);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <int N>
			
 
				-inline void NdArrayDescsForElementwiseBroadcast(
			
 
				-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
			
 
				-    NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
			
 
				-  TFLITE_DCHECK(desc0_out != nullptr);
			
 
				-  TFLITE_DCHECK(desc1_out != nullptr);
			
 
				-
			
 
				-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
			
 
				-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
			
 
				-
			
 
				-  // Copy dims to desc, calculating strides.
			
 
				-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
			
 
				-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
			
 
				-
			
 
				-  // Walk over each dimension. If the extents are equal do nothing.
			
 
				-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
			
 
				-  // stride 0.
			
 
				-  for (int i = 0; i < N; ++i) {
			
 
				-    const int extent0 = extended_input0_shape.Dims(i);
			
 
				-    const int extent1 = extended_input1_shape.Dims(i);
			
 
				-    if (extent0 != extent1) {
			
 
				-      if (extent0 == 1) {
			
 
				-        desc0_out->strides[i] = 0;
			
 
				-        desc0_out->extents[i] = extent1;
			
 
				-      } else {
			
 
				-        TFLITE_DCHECK_EQ(extent1, 1);
			
 
				-        desc1_out->strides[i] = 0;
			
 
				-        desc1_out->extents[i] = extent0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <int N>
			
 
				-inline void NdArrayDescsForElementwiseBroadcast(
			
 
				-    const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
			
 
				-    const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
			
 
				-    NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
			
 
				-  TFLITE_DCHECK(desc0_out != nullptr);
			
 
				-  TFLITE_DCHECK(desc1_out != nullptr);
			
 
				-  TFLITE_DCHECK(desc2_out != nullptr);
			
 
				-
			
 
				-  auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
			
 
				-  auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
			
 
				-  auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
			
 
				-
			
 
				-  // Copy dims to desc, calculating strides.
			
 
				-  CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
			
 
				-  CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
			
 
				-  CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
			
 
				-
			
 
				-  // Walk over each dimension. If the extents are equal do nothing.
			
 
				-  // Otherwise, set the desc with extent 1 to have extent equal to the other and
			
 
				-  // stride 0.
			
 
				-  for (int i = 0; i < N; ++i) {
			
 
				-    const int extent0 = extended_input0_shape.Dims(i);
			
 
				-    const int extent1 = extended_input1_shape.Dims(i);
			
 
				-    const int extent2 = extended_input2_shape.Dims(i);
			
 
				-
			
 
				-    int extent = extent0;
			
 
				-    if (extent1 != 1) extent = extent1;
			
 
				-    if (extent2 != 1) extent = extent2;
			
 
				-
			
 
				-    TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
			
 
				-    TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
			
 
				-    TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
			
 
				-
			
 
				-    if (!(extent0 == extent1 && extent1 == extent2)) {
			
 
				-      if (extent0 == 1) {
			
 
				-        desc0_out->strides[i] = 0;
			
 
				-        desc0_out->extents[i] = extent;
			
 
				-      }
			
 
				-      if (extent1 == 1) {
			
 
				-        desc1_out->strides[i] = 0;
			
 
				-        desc1_out->extents[i] = extent;
			
 
				-      }
			
 
				-      if (extent2 == 1) {
			
 
				-        desc2_out->strides[i] = 0;
			
 
				-        desc2_out->extents[i] = extent;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
			
 
				-// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
			
 
				-// re-writen as:
			
 
				-// for (int b = 0; b < output.extents[0]; ++b) {
			
 
				-//   for (int y = 0; y < output.extents[1]; ++y) {
			
 
				-//     for (int x = 0; x < output.extents[2]; ++x) {
			
 
				-//       for (int c = 0; c < output.extents[3]; ++c) {
			
 
				-//           calc({b,y,x,c});
			
 
				-//       }
			
 
				-//     }
			
 
				-//   }
			
 
				-// }
			
 
				-template <int N, int DIM, typename Calc>
			
 
				-typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
			
 
				-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
			
 
				-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
			
 
				-    NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <int N, int DIM, typename Calc>
			
 
				-typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
			
 
				-    const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
			
 
				-  for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
			
 
				-    calc(indexes);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Execute the calc function in the innermost iteration based on the shape of
			
 
				-// the output. The calc function should take a single argument of type int[N].
			
 
				-template <int N, typename Calc>
			
 
				-inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
			
 
				-  int indexes[N] = {0};
			
 
				-  NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
			
 
				-}
			
 
				-// Copied from gemmlowp::RoundDown when we dropped direct dependency on
			
 
				-// gemmlowp.
			
 
				-//
			
 
				-// Returns the runtime argument rounded down to the nearest multiple of
			
 
				-// the fixed Modulus.
			
 
				-template <unsigned Modulus, typename Integer>
			
 
				-Integer RoundDown(Integer i) {
			
 
				-  return i - (i % Modulus);
			
 
				-}
			
 
				-
			
 
				-// Copied from gemmlowp::RoundUp when we dropped direct dependency on
			
 
				-// gemmlowp.
			
 
				-//
			
 
				-// Returns the runtime argument rounded up to the nearest multiple of
			
 
				-// the fixed Modulus.
			
 
				-template <unsigned Modulus, typename Integer>
			
 
				-Integer RoundUp(Integer i) {
			
 
				-  return RoundDown<Modulus>(i + Modulus - 1);
			
 
				-}
			
 
				-
			
 
				-// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
			
 
				-// gemmlowp.
			
 
				-//
			
 
				-// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
			
 
				-template <typename Integer>
			
 
				-Integer CeilQuotient(Integer a, Integer b) {
			
 
				-  return (a + b - 1) / b;
			
 
				-}
			
 
				-
			
 
				-// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
			
 
				-// the direct dependency of internal/optimized/ on gemmlowp.
			
 
				-//
			
 
				-// It computes a reasonable number of threads to use for a GEMM of shape
			
 
				-// (rows, cols, depth).
			
 
				-//
			
 
				-// TODO(b/131910176): get rid of this function by switching each call site
			
 
				-// to its own more sensible logic for its own workload.
			
 
				-template <int KernelRows>
			
 
				-inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
			
 
				-                                int depth) {
			
 
				-  // Early-exit in the default case where multi-threading is disabled.
			
 
				-  if (max_num_threads == 1) {
			
 
				-    return 1;
			
 
				-  }
			
 
				-
			
 
				-  // Ensure that each thread has KernelRows rows to process, if at all possible.
			
 
				-  int thread_count = std::min(max_num_threads, rows / KernelRows);
			
 
				-
			
 
				-  // Limit the number of threads according to the overall size of the problem.
			
 
				-  if (thread_count > 1) {
			
 
				-    // Empirically determined value.
			
 
				-    static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
			
 
				-
			
 
				-    // We can only multiply two out of three sizes without risking overflow
			
 
				-    const std::uint64_t cubic_size =
			
 
				-        std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
			
 
				-
			
 
				-    thread_count = std::min(
			
 
				-        thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
			
 
				-  }
			
 
				-
			
 
				-  if (thread_count < 1) {
			
 
				-    thread_count = 1;
			
 
				-  }
			
 
				-
			
 
				-  assert(thread_count > 0 && thread_count <= max_num_threads);
			
 
				-  return thread_count;
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-void optimized_ops_preload_l1_stream(const T* ptr) {
			
 
				-#ifdef __GNUC__
			
 
				-  // builtin offered by GCC-compatible compilers including clang
			
 
				-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
			
 
				-#else
			
 
				-  (void)ptr;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-void optimized_ops_preload_l1_keep(const T* ptr) {
			
 
				-#ifdef __GNUC__
			
 
				-  // builtin offered by GCC-compatible compilers including clang
			
 
				-  __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
			
 
				-#else
			
 
				-  (void)ptr;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
			
 
				-#ifdef __GNUC__
			
 
				-  // builtin offered by GCC-compatible compilers including clang
			
 
				-  __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
			
 
				-#else
			
 
				-  (void)ptr;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h
@@ -1,122 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
			
 
				-
			
 
				-#include <cstdint>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/op_macros.h"
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK
			
 
				-#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK_EQ
			
 
				-#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK_NE
			
 
				-#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK_GE
			
 
				-#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK_GT
			
 
				-#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK_LE
			
 
				-#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DCHECK_LT
			
 
				-#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
			
 
				-#endif
			
 
				-
			
 
				-// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
			
 
				-#ifndef TFLITE_CHECK
			
 
				-#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_CHECK_EQ
			
 
				-#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_CHECK_NE
			
 
				-#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_CHECK_GE
			
 
				-#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_CHECK_GT
			
 
				-#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_CHECK_LE
			
 
				-#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_CHECK_LT
			
 
				-#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TF_LITE_STATIC_MEMORY
			
 
				-// TODO(b/162019032): Consider removing these type-aliases.
			
 
				-using int8 = std::int8_t;
			
 
				-using uint8 = std::uint8_t;
			
 
				-using int16 = std::int16_t;
			
 
				-using uint16 = std::uint16_t;
			
 
				-using int32 = std::int32_t;
			
 
				-using uint32 = std::uint32_t;
			
 
				-#endif  // !defined(TF_LITE_STATIC_MEMORY)
			
 
				-
			
 
				-// Allow for cross-compiler usage of function signatures - currently used for
			
 
				-// specifying named RUY profiler regions in templated methods.
			
 
				-#if defined(_MSC_VER)
			
 
				-#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
			
 
				-#elif defined(__GNUC__)
			
 
				-#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
			
 
				-#else
			
 
				-#define TFLITE_PRETTY_FUNCTION __func__
			
 
				-#endif
			
 
				-
			
 
				-// TFLITE_DEPRECATED()
			
 
				-//
			
 
				-// Duplicated from absl/base/macros.h to avoid pulling in that library.
			
 
				-// Marks a deprecated class, struct, enum, function, method and variable
			
 
				-// declarations. The macro argument is used as a custom diagnostic message (e.g.
			
 
				-// suggestion of a better alternative).
			
 
				-//
			
 
				-// Example:
			
 
				-//
			
 
				-//   class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
			
 
				-//   TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
			
 
				-//
			
 
				-// Every usage of a deprecated entity will trigger a warning when compiled with
			
 
				-// clang's `-Wdeprecated-declarations` option. This option is turned off by
			
 
				-// default, but the warnings will be reported by clang-tidy.
			
 
				-#if defined(__clang__) && __cplusplus >= 201103L
			
 
				-#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
			
 
				-#endif
			
 
				-
			
 
				-#ifndef TFLITE_DEPRECATED
			
 
				-#define TFLITE_DEPRECATED(message)
			
 
				-#endif
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h
@@ -1,40 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
			
 
				-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
			
 
				-#define TF_LITE_GLOBAL_STD_PREFIX
			
 
				-#else
			
 
				-#define TF_LITE_GLOBAL_STD_PREFIX std
			
 
				-#endif
			
 
				-
			
 
				-#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
			
 
				-  template <class T>                                  \
			
 
				-  inline T tf_name(const T x) {                       \
			
 
				-    return TF_LITE_GLOBAL_STD_PREFIX::std_name(x);    \
			
 
				-  }
			
 
				-
			
 
				-DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
			
 
				-DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1);
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h
@@ -1,35 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
			
 
				-inline float TfLiteMax(const float& x, const float& y) {
			
 
				-  return std::max(x, y);
			
 
				-}
			
 
				-#else
			
 
				-template <class T>
			
 
				-inline T TfLiteMax(const T& x, const T& y) {
			
 
				-  return std::fmax(x, y);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h
@@ -1,35 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
			
 
				-inline float TfLiteMin(const float& x, const float& y) {
			
 
				-  return std::min(x, y);
			
 
				-}
			
 
				-#else
			
 
				-template <class T>
			
 
				-inline T TfLiteMin(const T& x, const T& y) {
			
 
				-  return std::fmin(x, y);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h
@@ -1,20 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
 
				-
			
 
				-// TFLM does not need to utilize any Neon optimizations.
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h
@@ -1,122 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
			
 
				-
			
 
				-#include <vector>
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
			
 
				-  return RuntimeShape(data.size(), data.data());
			
 
				-}
			
 
				-
			
 
				-// A list of tensors in a format that can be used by kernels like split and
			
 
				-// concatenation.
			
 
				-template <typename T>
			
 
				-class VectorOfTensors {
			
 
				- public:
			
 
				-  // Build with the tensors in 'tensor_list'.
			
 
				-  VectorOfTensors(const TfLiteContext& context,
			
 
				-                  const TfLiteIntArray& tensor_list) {
			
 
				-    int num_tensors = tensor_list.size;
			
 
				-
			
 
				-    all_data_.reserve(num_tensors);
			
 
				-    all_shape_.reserve(num_tensors);
			
 
				-    all_shape_ptr_.reserve(num_tensors);
			
 
				-
			
 
				-    for (int i = 0; i < num_tensors; ++i) {
			
 
				-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
			
 
				-      all_data_.push_back(GetTensorData<T>(t));
			
 
				-      all_shape_.push_back(GetTensorShape(t));
			
 
				-    }
			
 
				-
			
 
				-    // Taking the pointer from inside a std::vector is only OK if the vector is
			
 
				-    // never modified, so we populate all_shape in the previous loop and then we
			
 
				-    // are free to grab iterators here.
			
 
				-    for (int i = 0; i < num_tensors; ++i) {
			
 
				-      all_shape_ptr_.push_back(&all_shape_[i]);
			
 
				-    }
			
 
				-  }
			
 
				-  // Return a pointer to the data pointers of all tensors in the list. For
			
 
				-  // example:
			
 
				-  //   float* const* f = v.data();
			
 
				-  //   f[0][1] is the second element of the first tensor.
			
 
				-  T* const* data() const { return all_data_.data(); }
			
 
				-
			
 
				-  // Return a pointer the shape pointers of all tensors in the list. For
			
 
				-  // example:
			
 
				-  //   const RuntimeShape* const* d = v.dims();
			
 
				-  //   dims[1] are the dimensions of the second tensor in the list.
			
 
				-  const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
			
 
				-
			
 
				- private:
			
 
				-  std::vector<T*> all_data_;
			
 
				-  std::vector<RuntimeShape> all_shape_;
			
 
				-  std::vector<RuntimeShape*> all_shape_ptr_;
			
 
				-};
			
 
				-
			
 
				-// A list of quantized tensors in a format that can be used by kernels like
			
 
				-// split and concatenation.
			
 
				-class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
			
 
				- public:
			
 
				-  // Build with the tensors in 'tensor_list'.
			
 
				-  VectorOfQuantizedTensors(const TfLiteContext& context,
			
 
				-                           const TfLiteIntArray& tensor_list)
			
 
				-      : VectorOfTensors<uint8_t>(context, tensor_list) {
			
 
				-    for (int i = 0; i < tensor_list.size; ++i) {
			
 
				-      TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
			
 
				-      zero_point_.push_back(t->params.zero_point);
			
 
				-      scale_.push_back(t->params.scale);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  const float* scale() const { return scale_.data(); }
			
 
				-  const int32_t* zero_point() const { return zero_point_.data(); }
			
 
				-
			
 
				- private:
			
 
				-  std::vector<int32_t> zero_point_;
			
 
				-  std::vector<float> scale_;
			
 
				-};
			
 
				-
			
 
				-// Writes randomly accessed values from `input` sequentially into `output`.
			
 
				-template <typename T>
			
 
				-class SequentialTensorWriter {
			
 
				- public:
			
 
				-  SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
			
 
				-    input_data_ = GetTensorData<T>(input);
			
 
				-    output_ptr_ = GetTensorData<T>(output);
			
 
				-  }
			
 
				-  SequentialTensorWriter(const T* input_data, T* output_data)
			
 
				-      : input_data_(input_data), output_ptr_(output_data) {}
			
 
				-
			
 
				-  void Write(int position) { *output_ptr_++ = input_data_[position]; }
			
 
				-  void WriteN(int position, int len) {
			
 
				-    memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
			
 
				-    output_ptr_ += len;
			
 
				-  }
			
 
				-
			
 
				- private:
			
 
				-  const T* input_data_;
			
 
				-  T* output_ptr_;
			
 
				-};
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h
@@ -1,484 +0,0 @@
 
				-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cmath>
			
 
				-#include <cstdint>
			
 
				-
			
 
				-#include "tensorflow/lite/c/builtin_op_data.h"
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-
			
 
				-#if defined(_MSC_VER)
			
 
				-#define __restrict__ __restrict
			
 
				-#endif
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace tensor_utils {
			
 
				-
			
 
				-// Multiplies a matrix with a scalar and reduce the result on each row to a
			
 
				-// scalar.
			
 
				-// Parameters:
			
 
				-//     - matrix: matrix of size n_row * n_col
			
 
				-//     - scalar: the scalar that is multiplied to each element in the matrix
			
 
				-//     - n_row:  the row count of the matrix
			
 
				-//     - n_col:  the column count of the matrix
			
 
				-//     - output: the 32bit output
			
 
				-// Note: We do not need saturation because the int8 * int8 is safe from overflow
			
 
				-// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
			
 
				-// initial output value is not exceptionally large.
			
 
				-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
			
 
				-                                    int32_t n_row, int32_t n_col,
			
 
				-                                    int32_t* output);
			
 
				-
			
 
				-// Add another vector for each batch in the batch vector.
			
 
				-template <typename T>
			
 
				-void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
			
 
				-                          T* batch_vector) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    for (int i = 0; i < v_size; ++i) {
			
 
				-      batch_vector[i] += vector[i];
			
 
				-    }
			
 
				-    batch_vector += v_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Cwise product of two vectors.
			
 
				-template <typename T>
			
 
				-inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
			
 
				-                                     int v_size, T* result) {
			
 
				-  for (int v = 0; v < v_size; v++) {
			
 
				-    *result++ = *vector1++ * *vector2++;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Cwise product of a vector and a batch-vector.
			
 
				-template <typename T>
			
 
				-inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
			
 
				-                                          const T* batch_vector, int n_batch,
			
 
				-                                          T* result) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
			
 
				-    // Update the pointers.
			
 
				-    result += v_size;
			
 
				-    batch_vector += v_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
			
 
				-// assumption here is that result array is initialized to valid values.
			
 
				-template <typename T>
			
 
				-inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
			
 
				-                                               const T* __restrict__ vector2,
			
 
				-                                               int v_size,
			
 
				-                                               T* __restrict__ result) {
			
 
				-  for (int v = 0; v < v_size; v++) {
			
 
				-    *result++ += *vector1++ * *vector2++;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
			
 
				-// operation, the assumption here is that result array is initialized to valid
			
 
				-// values.
			
 
				-template <typename T>
			
 
				-inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
			
 
				-                                                    const T* batch_vector,
			
 
				-                                                    int n_batch, T* result) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
			
 
				-    // Update the pointers.
			
 
				-    result += v_size;
			
 
				-    batch_vector += v_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Batch vector initialization with another vector.
			
 
				-template <typename T>
			
 
				-void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
			
 
				-                             T* batch_vector) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    std::copy_n(vector, v_size, batch_vector + b * v_size);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Checks if all entries of vector are zero for float.
			
 
				-bool IsZeroVector(const float* vector, int v_size);
			
 
				-
			
 
				-// Checks if all entries of vector are zero for int8.
			
 
				-bool IsZeroVector(const int8_t* vector, int v_size);
			
 
				-
			
 
				-// Quantizes a buffer of floating point values using a symmetric quantization
			
 
				-// (i.e. linear quantization without an offset) to 8-bit signed integers.
			
 
				-// It also outputs the range (min, max) of the floating point buffer, and the
			
 
				-// scaling factor used to quantize the values.
			
 
				-void SymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                             int8_t* quantized_values, float* min_value,
			
 
				-                             float* max_value, float* scaling_factor);
			
 
				-
			
 
				-// Quantizes a buffer of floating point values using a symmetric quantization
			
 
				-// (i.e. linear quantization without an offset) to 8-bit signed integers.
			
 
				-// It uses the range (min, max) provided to the function to calculate the
			
 
				-// appropriate scaling factor to quantize the values.
			
 
				-void SymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                             int8_t* quantized_values, float min_value,
			
 
				-                             float max_value, float* scaling_factor);
			
 
				-
			
 
				-void AsymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                              int8_t* quantized_values, float* scaling_factor,
			
 
				-                              int32_t* offset);
			
 
				-
			
 
				-// Helper function to quantize floats.
			
 
				-// float_data_ptr     input float vectors
			
 
				-// n_batch            number of input vectors
			
 
				-// n_data             size of a single input vector
			
 
				-// quantized_data_ptr (out) vector with quantized data
			
 
				-// scaling_factors    (out) scaling factors (one per vector)
			
 
				-// zero_points        (out) zero points (one per vector)
			
 
				-// do_asymmetric      controls if the quantization should be asymmetric.
			
 
				-inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
			
 
				-                                int n_data, int8_t* quantized_data_ptr,
			
 
				-                                float* scaling_factors, int32_t* zero_points,
			
 
				-                                bool do_asymmetric) {
			
 
				-  for (int b = 0; b < n_batch; ++b) {
			
 
				-    const int offset = b * n_data;
			
 
				-    if (do_asymmetric) {
			
 
				-      tensor_utils::AsymmetricQuantizeFloats(
			
 
				-          float_data_ptr + offset, n_data, quantized_data_ptr + offset,
			
 
				-          &scaling_factors[b], &zero_points[b]);
			
 
				-    } else {
			
 
				-      float unused_min, unused_max;
			
 
				-      tensor_utils::SymmetricQuantizeFloats(
			
 
				-          float_data_ptr + offset, n_data, quantized_data_ptr + offset,
			
 
				-          &unused_min, &unused_max, &scaling_factors[b]);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
			
 
				-// dimension composed by input vectors independent from each other). The result
			
 
				-// of the multiplication is accumulated to the passed result buffer.
			
 
				-// More specifically, for a matrix M of shape [n, i] and a batched-vector
			
 
				-// of shape [i, batch] it will first compute the product of shape [n, batch].
			
 
				-// This product will be accumulated to the result buffer.
			
 
				-void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
			
 
				-                                         int m_cols, const float* vector,
			
 
				-                                         int n_batch, float* result);
			
 
				-
			
 
				-// Same as the function above, but the matrix is a sparse tensor with block
			
 
				-// pattern 1x4.
			
 
				-// This function assumes that m_cols is a multiple of the block size (4 in this
			
 
				-// case) so that there's no incomplete block.
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
			
 
				-
			
 
				-// Same as the function above, but the matrix is stored in block compressed
			
 
				-// sparse row format with block pattern 1x16 which consists of two arrays:
			
 
				-//   1. A matrix array stores non-zero blocks of the matrix in row major.
			
 
				-//   2. A ledger array stores nrows groups, one group per row. Each group starts
			
 
				-//      with an integer representing the number of non-zero blocks for the
			
 
				-//      corresponding row and follows with column indexes of the first element
			
 
				-//      of each non-zero block.
			
 
				-// This function assumes that
			
 
				-//   1. m_cols is a multiple of 16 so that all blocks are full blocks.
			
 
				-//   2. m_cols < 254 * 16 so that block index can be represented by uint8.
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
			
 
				-    float* __restrict__ result);
			
 
				-
			
 
				-// Same as the function above, but for values quantized using symmetric
			
 
				-// quantization (e.g. by calling SymmetricQuantizeFloats).
			
 
				-// The passed scaling factors is a buffer of the quantization scaling factors
			
 
				-// that will be used to dequentize the products into the final result buffer.
			
 
				-// These scaling factors are the multiplication of the matrix scaling factor
			
 
				-// by the vector's scaling factor, one per batch (i.e. this allows quantizing
			
 
				-// each batch in the batch-vector matrix independently).
			
 
				-void MatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors,
			
 
				-    const float* __restrict__ scaling_factors, int n_batch,
			
 
				-    float* __restrict__ result);
			
 
				-
			
 
				-// Same as the function above except that vector values
			
 
				-// are quantized with asymmetric quantization per-batch and the matrix
			
 
				-// is quantized per row.
			
 
				-void MatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors,
			
 
				-    const float* __restrict__ scaling_factors, int n_batch,
			
 
				-    float* __restrict__ result, const float* __restrict__ per_channel_scale,
			
 
				-    const int32_t* __restrict__ input_offset);
			
 
				-
			
 
				-// Same as the function above, but the matrix is a sparse tensor with block
			
 
				-// pattern 1x16.
			
 
				-// This function assumes that m_cols is a multiple of the block size (16 in this
			
 
				-// case) so that there's no incomplete block. Also, it assumes all offsets of
			
 
				-// input, output and filter are zero.
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate1x16(
			
 
				-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
			
 
				-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
			
 
				-    const int32_t output_shift, const int32_t output_offset,
			
 
				-    const int32_t output_activation_min, const int32_t output_activation_max,
			
 
				-    int8_t* __restrict__ result);
			
 
				-
			
 
				-// Same as the function above, but the matrix is stored in block compressed
			
 
				-// sparse row format with block pattern 1x16 which consists of two arrays:
			
 
				-//   1. A matrix array stores non-zero blocks of the matrix in row major.
			
 
				-//   2. A ledger array stores nrows groups, one group per row. Each group starts
			
 
				-//      with an integer representing the number of non-zero blocks for the
			
 
				-//      corresponding row followed by column index of the first element of
			
 
				-//      each non-zero block.
			
 
				-// This function assumes that
			
 
				-//   1. m_cols is a multiple of 16 so that all blocks are full blocks.
			
 
				-//   2. m_cols < 254 * 16 so that block index can be represented by uint8.
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				-    const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
			
 
				-    const float* __restrict__ scaling_factors, int n_batch,
			
 
				-    float* __restrict__ result);
			
 
				-
			
 
				-// Same as the above 8, 8, 8 integer matmul except for the presence of zero
			
 
				-// point and non-accumulative.
			
 
				-// TODO(b/148688698): remove this function by folding zero point calculation in
			
 
				-// prepare() function.
			
 
				-void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
			
 
				-                               const int8_t* input_to_gate_weights,
			
 
				-                               int32_t input_to_gate_effective_scale_a,
			
 
				-                               int32_t input_to_gate_effective_scale_b,
			
 
				-                               int32_t n_batch, int32_t n_input, int32_t n_cell,
			
 
				-                               int8_t* gate_output, int8_t gate_output_zp);
			
 
				-
			
 
				-// Same as above but has 16 bit and 8 bit input and 8 bit output.
			
 
				-// Used in projection when hidden is 16bit.
			
 
				-void MatrixBatchVectorMultiply(const int16_t* hidden,
			
 
				-                               const int8_t* hidden_to_output_weights,
			
 
				-                               int32_t proj_effective_scale_a,
			
 
				-                               int32_t proj_effective_scale_b,
			
 
				-                               const int32_t* gate_bias, int32_t n_batch,
			
 
				-                               int32_t n_hidden, int32_t n_output,
			
 
				-                               int32_t output_zp, int8_t* proj_output);
			
 
				-
			
 
				-// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
			
 
				-// vector.
			
 
				-// Parameters:
			
 
				-//     - input: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - layer_norm_weights:  the quantized layer normalization weights.
			
 
				-//     - bias: the bias for the layer normalization.
			
 
				-//     - layer_norm_scale_a: multiplier for scale factor.
			
 
				-//     - layer_norm_scale_b: shift for scale factor.
			
 
				-//     - variance_limit: the guard to make sure the inverse does not overflow.
			
 
				-//     - n_batch: the number of batches.
			
 
				-//     - n_input: the size for input and output.
			
 
				-//     - output:  the 16 bit output
			
 
				-void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
			
 
				-                    const int32_t* bias, int32_t layer_norm_scale_a,
			
 
				-                    int32_t layer_norm_scale_b, int32_t variance_limit,
			
 
				-                    int n_batch, int n_input, int16_t* output);
			
 
				-
			
 
				-// Same as above but the internal calculation is done in float.
			
 
				-void ApplyLayerNormFloat(const int16_t* input,
			
 
				-                         const int16_t* layer_norm_weights,
			
 
				-                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
			
 
				-                         const int32_t* bias, int n_batch, int n_input,
			
 
				-                         int16_t* output);
			
 
				-
			
 
				-// Apply Sigmoid to a quantized vector.
			
 
				-// Parameters:
			
 
				-//     - input: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - n_batch: the number of batches.
			
 
				-//     - n_input: the size for input and output.
			
 
				-//     - output:  the 16 bit output
			
 
				-// The input is in Q3.12 format and the output is in Q0.15 format.
			
 
				-void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
			
 
				-                  int16_t* output);
			
 
				-
			
 
				-// Same as above but the internal calcualtion is float.
			
 
				-void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
			
 
				-                       int16_t* output);
			
 
				-
			
 
				-// Apply Tanh to a quantized vector.
			
 
				-// Parameters:
			
 
				-//     - integer_bits: the integer bits of the input.
			
 
				-//                     Currently supports 0, 1, 2, 3, 4, 5, 6.
			
 
				-//     - input: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - n_batch: the number of batches.
			
 
				-//     - n_input: the size for input and output.
			
 
				-//     - output:  the 16 bit output
			
 
				-// The input is in Qm.15-m format and the output is in Q0.15 format.
			
 
				-void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
			
 
				-               int32_t n_input, int16_t* output);
			
 
				-
			
 
				-// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
			
 
				-//    - Input has 2^(integer_bits) as scale.
			
 
				-//    - Output has Q0.15 as scale.
			
 
				-void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
			
 
				-                    int32_t integer_bits, int16_t* output);
			
 
				-
			
 
				-// Element-wise multiplication of two quantized vectors.
			
 
				-// Parameters:
			
 
				-//     - input_1: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - input_2: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - n_batch: the number of batches.
			
 
				-//     - n_input: the size for input and output.
			
 
				-//     - shift:   the shift needed to produce the output.
			
 
				-//     - output:  the 16 bit output of size n_batch * n_input.
			
 
				-// Output does not need to be initialized.
			
 
				-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
			
 
				-              int n_input, int shift, int16_t* output);
			
 
				-
			
 
				-// Element-wise multiplication of two quantized vectors.
			
 
				-// Parameters:
			
 
				-//     - input_1: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - input_2: batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - n_batch: the number of batches.
			
 
				-//     - n_input: the size for input and output.
			
 
				-//     - shift:   the shift needed to produce the output.
			
 
				-//     - output:  the 8 bit output of size n_batch * n_input.
			
 
				-// Output does not need to be initialized.
			
 
				-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
			
 
				-              int n_input, int shift, int8_t* output);
			
 
				-
			
 
				-// Element-wise multiplication of two quantized vectors with rescaling.
			
 
				-// Parameters:
			
 
				-//     - input_1:    batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - input_2:    batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - multiplier: the multiplier part of scale.
			
 
				-//     - shift:      the shift part of scale.
			
 
				-//     - n_batch:    the number of batches.
			
 
				-//     - n_input:    the size for input and output.
			
 
				-//     - output:     the 8 bit output of size n_batch * n_input.
			
 
				-//     - output_zp:  the zero point of output.
			
 
				-// Output does not need to be initialized.
			
 
				-// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
			
 
				-// 2^(s - 31).
			
 
				-void CwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				-              int32_t multiplier, int32_t shift, int32_t n_batch,
			
 
				-              int32_t n_input, int32_t output_zp, int8_t* output);
			
 
				-
			
 
				-// Element-wise saturating addition of two quantized vectors without rescaling.
			
 
				-// Parameters:
			
 
				-//     - input_1:    batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - input_2:    batch vector of size n_batch * n_input; 16 bit.
			
 
				-//     - n_batch:    the number of batches.
			
 
				-//     - n_input:    the size for input and output.
			
 
				-//     - output:     the 8 bit output of size n_batch * n_input.
			
 
				-// Output does not need to be initialized.
			
 
				-void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
			
 
				-              int n_input, int16_t* output);
			
 
				-
			
 
				-// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
			
 
				-// int8_t. Parameters:
			
 
				-//     - vector:         vector of size v_size.
			
 
				-//     - v_size:         the size of the vector.
			
 
				-//     - clipping_value: the value used for clipping.
			
 
				-void CwiseClipping(float* vector, const int v_size, const float clipping_value);
			
 
				-void CwiseClipping(int16_t* vector, const int v_size,
			
 
				-                   const int16_t clipping_value);
			
 
				-void CwiseClipping(int8_t* vector, const int v_size,
			
 
				-                   const int8_t clipping_value);
			
 
				-
			
 
				-// Dot product of two vectors.
			
 
				-float VectorVectorDotProduct(const float* vector1, const float* vector2,
			
 
				-                             int v_size);
			
 
				-
			
 
				-// Dot product of two batch vectors of size n_batch * v_size:
			
 
				-// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
			
 
				-//            x_2_1, x_2_2, ..., x_2_vsize,
			
 
				-//            ...
			
 
				-//            x_nbatch_1,..., x_nbatch_vsize]
			
 
				-// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
			
 
				-//            y_2_1, y_2_2, ..., y_2_vsize,
			
 
				-//            ...
			
 
				-//            y_nbatch_1,..., y_nbatch_vsize]
			
 
				-// Then result will be a vector of n_batch size starting from 'result':
			
 
				-// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
			
 
				-//  x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
			
 
				-//  ...
			
 
				-//  x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
			
 
				-template <typename T>
			
 
				-inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
			
 
				-                                             int v_size, int n_batch,
			
 
				-                                             T* result) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
			
 
				-    vector1 += v_size;
			
 
				-    vector2 += v_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Same as above but input is 16bit and output is 32bit.
			
 
				-void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
			
 
				-                                      const int16_t* vector2, int v_size,
			
 
				-                                      int n_batch, int32_t* result);
			
 
				-
			
 
				-// Same as above, but inputs are 16bit integer and output is 16bit integer.
			
 
				-void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
			
 
				-                                             const int16_t* batch_vector,
			
 
				-                                             int n_batch, int32_t multiplier,
			
 
				-                                             int shift, int16_t* result);
			
 
				-
			
 
				-// Compute "1.0f - elements of vector" (used in CIFG).
			
 
				-void Sub1Vector(const float* vector, int v_size, float* result);
			
 
				-
			
 
				-// Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
			
 
				-// "vector" has range [0, 32767] because it is the output of sigmoid function.
			
 
				-void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
			
 
				-
			
 
				-// Multiply all elements of vector with a scalar.
			
 
				-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
			
 
				-                          float* result);
			
 
				-
			
 
				-// Reduce-sum on a float input vector:
			
 
				-// input_vector: float pointer to input vector.
			
 
				-// output_vector: float pointer to vector.
			
 
				-// output_size: output vector size.
			
 
				-// reduction_size: number of consecutive elements from input vector which are
			
 
				-// added to get one element of output.
			
 
				-void ReductionSumVector(const float* input_vector, float* output_vector,
			
 
				-                        int output_size, int reduction_size);
			
 
				-
			
 
				-// Same as above but input/output is 32 bit integer.
			
 
				-void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
			
 
				-                        int output_size, int reduction_size);
			
 
				-
			
 
				-// Same as above but input is 8 bit integer.
			
 
				-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
			
 
				-                        int output_size, int reduction_size);
			
 
				-
			
 
				-// Layer norm for each batch.
			
 
				-void MeanStddevNormalization(const float* input_vector, float* output_vector,
			
 
				-                             int v_size, int n_batch);
			
 
				-
			
 
				-// Saturate Add with rescale on both inputs.
			
 
				-void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
			
 
				-                          const int8_t* recurrent, int8_t recurrent_zp,
			
 
				-                          int32_t input_effective_scale_a,
			
 
				-                          int32_t input_effective_scale_b,
			
 
				-                          int32_t recurrent_effective_scale_a,
			
 
				-                          int32_t recurrent_effective_scale_b, int32_t n_batch,
			
 
				-                          int32_t n_cell, int16_t* output);
			
 
				-
			
 
				-}  // namespace tensor_utils
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc
@@ -1,416 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cmath>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace {
			
 
				-// These constants are used to manipulate the binary representation of doubles.
			
 
				-// Double-precision binary64 floating point format is:
			
 
				-// Bit |  63  |  62-52   |   51-0   |
			
 
				-//     | Sign | Exponent | Fraction |
			
 
				-// To avoid 64-bit integers as much as possible, I break this into high and
			
 
				-// low 32-bit chunks. High is:
			
 
				-// Bit |  31  |  30-20   |      19-0     |
			
 
				-//     | Sign | Exponent | High Fraction |
			
 
				-// Low is:
			
 
				-// Bit |     31-0     |
			
 
				-//     | Low Fraction |
			
 
				-// We then access the components through logical bit-wise operations to
			
 
				-// extract the parts needed, with the positions and masks derived from the
			
 
				-// layout shown above.
			
 
				-constexpr uint64_t kSignMask = 0x8000000000000000LL;
			
 
				-constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
			
 
				-constexpr int32_t kExponentShift = 52;
			
 
				-constexpr int32_t kExponentBias = 1023;
			
 
				-constexpr uint32_t kExponentIsBadNum = 0x7ff;
			
 
				-constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
			
 
				-constexpr uint32_t kFractionShift = 22;
			
 
				-constexpr uint32_t kFractionRoundingMask = 0x003fffff;
			
 
				-constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
			
 
				-}  // namespace
			
 
				-
			
 
				-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
			
 
				-                        int* shift) {
			
 
				-#if TFLITE_SINGLE_ROUNDING
			
 
				-  // Single-rounding MultiplyByQuantizedMultiplier only supports positive
			
 
				-  // multipliers.
			
 
				-  // TFLITE_DCHECK(double_multiplier >= 0);
			
 
				-#endif
			
 
				-  if (double_multiplier == 0.) {
			
 
				-    *quantized_multiplier = 0;
			
 
				-    *shift = 0;
			
 
				-    return;
			
 
				-  }
			
 
				-#ifdef TFLITE_EMULATE_FLOAT
			
 
				-  // If we're trying to avoid the use of floating-point instructions (for
			
 
				-  // example on microcontrollers) then use an alternative implementation
			
 
				-  // that only requires integer and bitwise operations. To enable this, you
			
 
				-  // need to set the define during the build process for your platform.
			
 
				-  int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
			
 
				-#else   // TFLITE_EMULATE_FLOAT
			
 
				-  const double q = std::frexp(double_multiplier, shift);
			
 
				-  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1LL << 31)));
			
 
				-#endif  // TFLITE_EMULATE_FLOAT
			
 
				-  TFLITE_CHECK(q_fixed <= (1LL << 31));
			
 
				-  if (q_fixed == (1LL << 31)) {
			
 
				-    q_fixed /= 2;
			
 
				-    ++*shift;
			
 
				-  }
			
 
				-  TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
			
 
				-  // A shift amount smaller than -31 would cause all bits to be shifted out
			
 
				-  // and thus all results would be zero. We implement that instead with
			
 
				-  // q_fixed==0, so as to avoid hitting issues with right-shift
			
 
				-  // operations with shift amounts greater than 31. Note that this happens
			
 
				-  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
			
 
				-  // that we're effectively flushing tiny double_multiplier's to zero.
			
 
				-  // We could conceivably handle values in the range (roughly) [32, 63]
			
 
				-  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
			
 
				-  // the present handling is just doing 'flush denormals to zero'. We could
			
 
				-  // reconsider and actually generate nonzero denormals if a need arises.
			
 
				-  if (*shift < -31) {
			
 
				-    *shift = 0;
			
 
				-    q_fixed = 0;
			
 
				-  }
			
 
				-#if TFLITE_SINGLE_ROUNDING
			
 
				-  // Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30,
			
 
				-  // saturate it.
			
 
				-  if (*shift > 30) {
			
 
				-    *shift = 30;
			
 
				-    q_fixed = (1LL << 31) - 1;
			
 
				-  }
			
 
				-#endif
			
 
				-  *quantized_multiplier = static_cast<int32_t>(q_fixed);
			
 
				-}
			
 
				-
			
 
				-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
			
 
				-                                      int32_t* quantized_multiplier,
			
 
				-                                      int* left_shift) {
			
 
				-  TFLITE_CHECK_GT(double_multiplier, 1.);
			
 
				-  QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
			
 
				-  TFLITE_CHECK_GE(*left_shift, 0);
			
 
				-}
			
 
				-
			
 
				-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
			
 
				-                                         int32_t* quantized_multiplier,
			
 
				-                                         int* left_shift) {
			
 
				-  TFLITE_CHECK_LT(double_multiplier, 1.);
			
 
				-  TFLITE_CHECK_GT(double_multiplier, 0.);
			
 
				-  int shift;
			
 
				-  QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
			
 
				-  TFLITE_CHECK_LE(shift, 0);
			
 
				-  *left_shift = shift;
			
 
				-}
			
 
				-
			
 
				-int64_t IntegerFrExp(double input, int* shift) {
			
 
				-  // Make sure our assumptions about the double layout hold.
			
 
				-  TFLITE_CHECK_EQ(8, sizeof(double));
			
 
				-
			
 
				-  // We want to access the bits of the input double value directly, which is
			
 
				-  // tricky to do safely, so use a union to handle the casting.
			
 
				-  union {
			
 
				-    double double_value;
			
 
				-    uint64_t double_as_uint;
			
 
				-  } cast_union;
			
 
				-  cast_union.double_value = input;
			
 
				-  const uint64_t u = cast_union.double_as_uint;
			
 
				-
			
 
				-  // If the bitfield is all zeros apart from the sign bit, this is a normalized
			
 
				-  // zero value, so return standard values for this special case.
			
 
				-  if ((u & ~kSignMask) == 0) {
			
 
				-    *shift = 0;
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Deal with NaNs and Infs, which are always indicated with a fixed pattern in
			
 
				-  // the exponent, and distinguished by whether the fractions are zero or
			
 
				-  // non-zero.
			
 
				-  const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
			
 
				-  if (exponent_part == kExponentIsBadNum) {
			
 
				-    *shift = std::numeric_limits<int>::max();
			
 
				-    if (u & kFractionMask) {
			
 
				-      // NaN, so just return zero (with the exponent set to INT_MAX).
			
 
				-      return 0;
			
 
				-    } else {
			
 
				-      // Infinity, so return +/- INT_MAX.
			
 
				-      if (u & kSignMask) {
			
 
				-        return std::numeric_limits<int64_t>::min();
			
 
				-      } else {
			
 
				-        return std::numeric_limits<int64_t>::max();
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // The shift is fairly easy to extract from the high bits of the double value,
			
 
				-  // just by masking it out and applying a bias. The std::frexp() implementation
			
 
				-  // always returns values between 0.5 and 1.0 though, whereas the exponent
			
 
				-  // assumes 1.0 to 2.0 is the standard range, so I add on one to match that
			
 
				-  // interface.
			
 
				-  *shift = (exponent_part - kExponentBias) + 1;
			
 
				-
			
 
				-  // There's an implicit high bit in the double format definition, so make sure
			
 
				-  // we include that at the top, and then reconstruct the rest of the fractional
			
 
				-  // value from the remaining fragments.
			
 
				-  int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
			
 
				-
			
 
				-  // We're cutting off some bits at the bottom, so to exactly match the standard
			
 
				-  // frexp implementation here we'll apply rounding by adding one to the least
			
 
				-  // significant bit of the result if the discarded portion is over half of the
			
 
				-  // maximum.
			
 
				-  if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
			
 
				-    fraction += 1;
			
 
				-  }
			
 
				-  // Negate the fraction if the sign bit was set.
			
 
				-  if (u & kSignMask) {
			
 
				-    fraction *= -1;
			
 
				-  }
			
 
				-
			
 
				-  return fraction;
			
 
				-}
			
 
				-
			
 
				-double DoubleFromFractionAndShift(int64_t fraction, int shift) {
			
 
				-  union {
			
 
				-    double double_value;
			
 
				-    uint64_t double_as_uint;
			
 
				-  } result;
			
 
				-
			
 
				-  // Detect NaNs and infinities.
			
 
				-  if (shift == std::numeric_limits<int>::max()) {
			
 
				-    if (fraction == 0) {
			
 
				-      return std::numeric_limits<double>::quiet_NaN();
			
 
				-    } else if (fraction > 0) {
			
 
				-      return std::numeric_limits<double>::infinity();
			
 
				-    } else {
			
 
				-      return -std::numeric_limits<double>::infinity();
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Return a normalized zero for a zero fraction.
			
 
				-  if (fraction == 0) {
			
 
				-    result.double_as_uint = 0;
			
 
				-    return result.double_value;
			
 
				-  }
			
 
				-
			
 
				-  bool is_negative = (fraction < 0);
			
 
				-  int64_t encoded_fraction = is_negative ? -fraction : fraction;
			
 
				-  int64_t encoded_shift = (shift - 1);
			
 
				-  while (encoded_fraction < 0x40000000) {
			
 
				-    encoded_fraction *= 2;
			
 
				-    encoded_shift -= 1;
			
 
				-  }
			
 
				-  while (encoded_fraction > 0x80000000) {
			
 
				-    encoded_fraction /= 2;
			
 
				-    encoded_shift += 1;
			
 
				-  }
			
 
				-  encoded_fraction -= 0x40000000;
			
 
				-  if (encoded_shift < -1022) {
			
 
				-    encoded_shift = -1023;
			
 
				-  } else if (encoded_shift > 1022) {
			
 
				-    encoded_shift = 1023;
			
 
				-  }
			
 
				-  encoded_shift += kExponentBias;
			
 
				-  uint64_t encoded_sign = is_negative ? kSignMask : 0;
			
 
				-  result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
			
 
				-                          (encoded_fraction << kFractionShift);
			
 
				-  return result.double_value;
			
 
				-}
			
 
				-
			
 
				-double IntegerDoubleMultiply(double a, double b) {
			
 
				-  int a_shift;
			
 
				-  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
			
 
				-  int b_shift;
			
 
				-  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
			
 
				-  // Detect NaNs and infinities.
			
 
				-  if (a_shift == std::numeric_limits<int>::max() ||
			
 
				-      (b_shift == std::numeric_limits<int>::max())) {
			
 
				-    return std::numeric_limits<double>::quiet_NaN();
			
 
				-  }
			
 
				-  const int result_shift = a_shift + b_shift + 1;
			
 
				-  const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
			
 
				-  return DoubleFromFractionAndShift(result_fraction, result_shift);
			
 
				-}
			
 
				-
			
 
				-int IntegerDoubleCompare(double a, double b) {
			
 
				-  int a_shift;
			
 
				-  const int64_t a_fraction = IntegerFrExp(a, &a_shift);
			
 
				-  int b_shift;
			
 
				-  const int64_t b_fraction = IntegerFrExp(b, &b_shift);
			
 
				-
			
 
				-  // Detect NaNs and infinities.
			
 
				-  if (a_shift == std::numeric_limits<int>::max() ||
			
 
				-      (b_shift == std::numeric_limits<int>::max())) {
			
 
				-    return 1;
			
 
				-  }
			
 
				-
			
 
				-  if ((a_fraction == 0) && (b_fraction < 0)) {
			
 
				-    return 1;
			
 
				-  } else if ((a_fraction < 0) && (b_fraction == 0)) {
			
 
				-    return -1;
			
 
				-  } else if (a_shift < b_shift) {
			
 
				-    return -1;
			
 
				-  } else if (a_shift > b_shift) {
			
 
				-    return 1;
			
 
				-  } else if (a_fraction < b_fraction) {
			
 
				-    return -1;
			
 
				-  } else if (a_fraction > b_fraction) {
			
 
				-    return 1;
			
 
				-  } else {
			
 
				-    return 0;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PreprocessSoftmaxScaling(double beta, double input_scale,
			
 
				-                              int input_integer_bits,
			
 
				-                              int32_t* quantized_multiplier, int* left_shift) {
			
 
				-  // If the overall multiplier (input and beta) is large, then exp() of an
			
 
				-  // input difference of 1 scaled by this will be large.  In other words, we
			
 
				-  // can cap the multiplier and know that, when it is used, the output will be
			
 
				-  // (round to) zero wherever the input is not at the maximum value.
			
 
				-
			
 
				-  // If the overall scale is less than one, and input_integer_bits=0, then the
			
 
				-  // result is double equivalent of Q0.31 (actually with more precision). Thus
			
 
				-  // this generates a Q(input_integer_bits).(31-input_integer_bits)
			
 
				-  // representation.
			
 
				-#if TFLITE_SINGLE_ROUNDING
			
 
				-  const double max_real_multiplier = (1LL << 30) - 1.0;
			
 
				-#else
			
 
				-  const double max_real_multiplier = (1LL << 31) - 1.0;
			
 
				-#endif
			
 
				-
			
 
				-#ifdef TFLITE_EMULATE_FLOAT
			
 
				-  const double input_beta = IntegerDoubleMultiply(beta, input_scale);
			
 
				-  int shift;
			
 
				-  int64_t fraction = IntegerFrExp(input_beta, &shift);
			
 
				-  shift += (31 - input_integer_bits);
			
 
				-  double input_beta_real_multiplier =
			
 
				-      DoubleFromFractionAndShift(fraction, shift);
			
 
				-  if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) >
			
 
				-      0) {
			
 
				-    input_beta_real_multiplier = max_real_multiplier;
			
 
				-  }
			
 
				-#else   // TFLITE_EMULATE_FLOAT
			
 
				-  const double input_beta_real_multiplier =
			
 
				-      std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)),
			
 
				-                       max_real_multiplier);
			
 
				-#endif  // TFLITE_EMULATE_FLOAT
			
 
				-
			
 
				-  QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
			
 
				-                                   quantized_multiplier, left_shift);
			
 
				-}
			
 
				-
			
 
				-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
			
 
				-                                    int input_integer_bits,
			
 
				-                                    int32_t* quantized_multiplier,
			
 
				-                                    int* left_shift,
			
 
				-                                    int32_t* reverse_scaling_divisor,
			
 
				-                                    int* reverse_scaling_left_shift) {
			
 
				-  PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
			
 
				-                           quantized_multiplier, left_shift);
			
 
				-
			
 
				-  // Also calculate what amounts to the inverse scaling factor for the input.
			
 
				-  const double real_reverse_scaling_divisor =
			
 
				-      (1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
			
 
				-  tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
			
 
				-                                              reverse_scaling_divisor,
			
 
				-                                              reverse_scaling_left_shift);
			
 
				-}
			
 
				-
			
 
				-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
			
 
				-                         int total_signed_bits) {
			
 
				-#ifdef TFLITE_EMULATE_FLOAT
			
 
				-  int64_t result = (1 << input_integer_bits) - 1;
			
 
				-  result <<= (total_signed_bits - input_integer_bits);
			
 
				-  result >>= input_left_shift;
			
 
				-  return result;
			
 
				-#else   // TFLITE_EMULATE_FLOAT
			
 
				-  const double max_input_rescaled =
			
 
				-      1.0 * ((1 << input_integer_bits) - 1) *
			
 
				-      (1LL << (total_signed_bits - input_integer_bits)) /
			
 
				-      (1LL << input_left_shift);
			
 
				-  // Tighten bound using floor.  Suppose that we could use the exact value.
			
 
				-  // After scaling the difference, the result would be at the maximum.  Thus we
			
 
				-  // must ensure that our value has lower magnitude.
			
 
				-  return static_cast<int>(std::floor(max_input_rescaled));
			
 
				-#endif  // TFLITE_EMULATE_FLOAT
			
 
				-}
			
 
				-
			
 
				-void NudgeQuantizationRange(const float min, const float max,
			
 
				-                            const int quant_min, const int quant_max,
			
 
				-                            float* nudged_min, float* nudged_max,
			
 
				-                            float* nudged_scale) {
			
 
				-  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
			
 
				-  const float quant_min_float = static_cast<float>(quant_min);
			
 
				-  const float quant_max_float = static_cast<float>(quant_max);
			
 
				-  *nudged_scale = (max - min) / (quant_max_float - quant_min_float);
			
 
				-  const float zero_point_from_min = quant_min_float - min / *nudged_scale;
			
 
				-  uint16_t nudged_zero_point;
			
 
				-  if (zero_point_from_min < quant_min_float) {
			
 
				-    nudged_zero_point = static_cast<uint16_t>(quant_min);
			
 
				-  } else if (zero_point_from_min > quant_max_float) {
			
 
				-    nudged_zero_point = static_cast<uint16_t>(quant_max);
			
 
				-  } else {
			
 
				-    nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
			
 
				-  }
			
 
				-  *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
			
 
				-  *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
			
 
				-}
			
 
				-
			
 
				-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
			
 
				-                       const float nudged_max, const float* input_data,
			
 
				-                       float* output_data, const float size) {
			
 
				-  // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
			
 
				-  const float inv_nudged_scale = 1.0f / nudged_scale;
			
 
				-
			
 
				-  for (int i = 0; i < size; i++) {
			
 
				-    const float src_val = input_data[i];
			
 
				-    const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
			
 
				-    const float clamped_shifted = clamped - nudged_min;
			
 
				-    const float dst_val =
			
 
				-        TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
			
 
				-        nudged_min;
			
 
				-    output_data[i] = dst_val;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-bool CheckedLog2(const float x, int* log2_result) {
			
 
				-  // Using TfLiteRound instead of std::round and std::log instead of
			
 
				-  // std::log2 to work around these functions being missing in a toolchain
			
 
				-  // used in some TensorFlow tests as of May 2018.
			
 
				-  const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
			
 
				-  const float x_log2_rounded = TfLiteRound(x_log2);
			
 
				-  const float x_log2_fracpart = x_log2 - x_log2_rounded;
			
 
				-
			
 
				-  *log2_result = static_cast<int>(x_log2_rounded);
			
 
				-  return std::abs(x_log2_fracpart) < 1e-3f;
			
 
				-}
			
 
				-
			
 
				-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
			
 
				-                             int32_t* effective_scale_significand,
			
 
				-                             int* effective_shift) {
			
 
				-  for (size_t i = 0; i < size; ++i) {
			
 
				-    QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
			
 
				-                       &effective_shift[i]);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace tflite
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h
@@ -1,292 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-#include <cstdint>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-// Given the min and max values of a float array, return
			
 
				-// reasonable quantization parameters to use for this array.
			
 
				-template <typename T>
			
 
				-QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
			
 
				-                                            bool narrow_range) {
			
 
				-  const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
			
 
				-  const T qmax = std::numeric_limits<T>::max();
			
 
				-  const double qmin_double = qmin;
			
 
				-  const double qmax_double = qmax;
			
 
				-  // 0 should always be a representable value. Let's assume that the initial
			
 
				-  // min,max range contains 0.
			
 
				-  TFLITE_CHECK_LE(rmin, 0.);
			
 
				-  TFLITE_CHECK_GE(rmax, 0.);
			
 
				-  if (rmin == rmax) {
			
 
				-    // Special case where the min,max range is a point. Should be {0}.
			
 
				-    TFLITE_CHECK_EQ(rmin, 0.);
			
 
				-    TFLITE_CHECK_EQ(rmax, 0.);
			
 
				-    QuantizationParams quantization_params;
			
 
				-    quantization_params.zero_point = 0;
			
 
				-    quantization_params.scale = 0.;
			
 
				-    return quantization_params;
			
 
				-  }
			
 
				-
			
 
				-  // General case.
			
 
				-  //
			
 
				-  // First determine the scale.
			
 
				-  const double scale = (rmax - rmin) / (qmax_double - qmin_double);
			
 
				-
			
 
				-  // Zero-point computation.
			
 
				-  // First the initial floating-point computation. The zero-point can be
			
 
				-  // determined from solving an affine equation for any known pair
			
 
				-  // (real value, corresponding quantized value).
			
 
				-  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
			
 
				-  // The arithmetic error on the zero point computed from either pair
			
 
				-  // will be roughly machine_epsilon * (sum of absolute values of terms)
			
 
				-  // so we want to use the variant that adds the smaller terms.
			
 
				-  const double zero_point_from_min = qmin_double - rmin / scale;
			
 
				-  const double zero_point_from_max = qmax_double - rmax / scale;
			
 
				-  const double zero_point_from_min_error =
			
 
				-      std::abs(qmin_double) + std::abs(rmin / scale);
			
 
				-  const double zero_point_from_max_error =
			
 
				-      std::abs(qmax_double) + std::abs(rmax / scale);
			
 
				-
			
 
				-  const double zero_point_double =
			
 
				-      zero_point_from_min_error < zero_point_from_max_error
			
 
				-          ? zero_point_from_min
			
 
				-          : zero_point_from_max;
			
 
				-
			
 
				-  // Now we need to nudge the zero point to be an integer
			
 
				-  // (our zero points are integer, and this is motivated by the requirement
			
 
				-  // to be able to represent the real value "0" exactly as a quantized value,
			
 
				-  // which is required in multiple places, for example in Im2col with SAME
			
 
				-  // padding).
			
 
				-  T nudged_zero_point = 0;
			
 
				-  if (zero_point_double < qmin_double) {
			
 
				-    nudged_zero_point = qmin;
			
 
				-  } else if (zero_point_double > qmax_double) {
			
 
				-    nudged_zero_point = qmax;
			
 
				-  } else {
			
 
				-    nudged_zero_point = static_cast<T>(round(zero_point_double));
			
 
				-  }
			
 
				-  // The zero point should always be in the range of quantized value,
			
 
				-  // [qmin, qmax].
			
 
				-  TFLITE_CHECK_GE(nudged_zero_point, qmin);
			
 
				-  TFLITE_CHECK_LE(nudged_zero_point, qmax);
			
 
				-
			
 
				-  // Finally, store the result nudged quantization params.
			
 
				-  QuantizationParams quantization_params;
			
 
				-  quantization_params.zero_point = nudged_zero_point;
			
 
				-  quantization_params.scale = scale;
			
 
				-  return quantization_params;
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
			
 
				-  return ChooseQuantizationParams<T>(rmin, rmax, false);
			
 
				-}
			
 
				-
			
 
				-// Converts a floating-point number to an integer. For all inputs x where
			
 
				-// static_cast<IntOut>(x) is legal according to the C++ standard, the result
			
 
				-// is identical to that cast (i.e. the result is x with its fractional part
			
 
				-// truncated whenever that is representable as IntOut).
			
 
				-//
			
 
				-// static_cast would cause undefined behavior for the following cases, which
			
 
				-// have well-defined behavior for this function:
			
 
				-//
			
 
				-//  1. If x is NaN, the result is zero.
			
 
				-//
			
 
				-//  2. If the truncated form of x is above the representable range of IntOut,
			
 
				-//     the result is std::numeric_limits<IntOut>::max().
			
 
				-//
			
 
				-//  3. If the truncated form of x is below the representable range of IntOut,
			
 
				-//     the result is std::numeric_limits<IntOut>::min().
			
 
				-//
			
 
				-// Note that cases #2 and #3 cover infinities as well as finite numbers.
			
 
				-//
			
 
				-// The range of FloatIn must include the range of IntOut, otherwise
			
 
				-// the results are undefined.
			
 
				-// TODO(sfeuz): Replace by absl::SafeCast once available.
			
 
				-template <class IntOut, class FloatIn>
			
 
				-IntOut SafeCast(FloatIn x) {
			
 
				-  static_assert(!std::numeric_limits<FloatIn>::is_integer,
			
 
				-                "FloatIn is integer");
			
 
				-  static_assert(std::numeric_limits<IntOut>::is_integer,
			
 
				-                "IntOut is not integer");
			
 
				-  static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
			
 
				-
			
 
				-  // Special case NaN, for which the logic below doesn't work.
			
 
				-  if (std::isnan(x)) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Negative values all clip to zero for unsigned results.
			
 
				-  if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  // Handle infinities.
			
 
				-  if (std::isinf(x)) {
			
 
				-    return x < 0 ? std::numeric_limits<IntOut>::min()
			
 
				-                 : std::numeric_limits<IntOut>::max();
			
 
				-  }
			
 
				-
			
 
				-  // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
			
 
				-  // unless x is zero in which case exp == 0. Note that this implies that the
			
 
				-  // magnitude of x is strictly less than 2^exp.
			
 
				-  int exp = 0;
			
 
				-  std::frexp(x, &exp);
			
 
				-
			
 
				-  // Let N be the number of non-sign bits in the representation of IntOut. If
			
 
				-  // the magnitude of x is strictly less than 2^N, the truncated version of x
			
 
				-  // is representable as IntOut. The only representable integer for which this
			
 
				-  // is not the case is kMin for signed types (i.e. -2^N), but that is covered
			
 
				-  // by the fall-through below.
			
 
				-  if (exp <= std::numeric_limits<IntOut>::digits) {
			
 
				-    return x;
			
 
				-  }
			
 
				-
			
 
				-  // Handle numbers with magnitude >= 2^N.
			
 
				-  return x < 0 ? std::numeric_limits<IntOut>::min()
			
 
				-               : std::numeric_limits<IntOut>::max();
			
 
				-}
			
 
				-
			
 
				-// Decompose a double multiplier into a Q0.31 int32 representation of its
			
 
				-// significand, and shift representation of NEGATIVE its exponent ---
			
 
				-// this is intended as a RIGHT-shift.
			
 
				-//
			
 
				-// Restricted to the case where the multiplier < 1 (and non-negative).
			
 
				-void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
			
 
				-                                         int32_t* quantized_multiplier,
			
 
				-                                         int* left_shift);
			
 
				-
			
 
				-// Decompose a double multiplier into a Q0.31 int32 representation of its
			
 
				-// significand, and shift representation of its exponent.
			
 
				-//
			
 
				-// Restricted to the case where the multiplier > 1.
			
 
				-void QuantizeMultiplierGreaterThanOne(double double_multiplier,
			
 
				-                                      int32_t* quantized_multiplier,
			
 
				-                                      int* left_shift);
			
 
				-
			
 
				-// Decompose a double multiplier into a Q0.31 int32 representation of its
			
 
				-// significand, and shift representation of its exponent.
			
 
				-//
			
 
				-// Handles an arbitrary positive multiplier. The 'shift' output-value is
			
 
				-// basically the 'floating-point exponent' of the multiplier:
			
 
				-// Negative for a right-shift (when the multiplier is <1), positive for a
			
 
				-// left-shift (when the multiplier is >1)
			
 
				-void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
			
 
				-                        int* shift);
			
 
				-
			
 
				-// Splits a double input value into a returned fraction, and a shift value from
			
 
				-// the exponent, using only bitwise and integer operations to support
			
 
				-// microcontrollers and other environments without floating-point support.
			
 
				-//
			
 
				-// This is designed to be a replacement for how std::frexp() is used within the
			
 
				-// QuantizeMultiplier() function, and so has a different signature than the
			
 
				-// standard version, returning a 64-bit integer rather than a double. This
			
 
				-// result has a maximum value of 1<<31, with the fraction expressed as a
			
 
				-// proportion of that maximum.
			
 
				-//
			
 
				-// std::frexp() returns NaNs and infinities unmodified, but since we're
			
 
				-// returning integers that can't represent those values, instead we return
			
 
				-// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
			
 
				-// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
			
 
				-// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
			
 
				-// result in return values that end up truncating some bits at the end,
			
 
				-// reflecting the loss of precision inherent in denormalization.
			
 
				-int64_t IntegerFrExp(double input, int* shift);
			
 
				-
			
 
				-// Converts an integer fraction in the format produced by IntegerFrExp (where
			
 
				-// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
			
 
				-// IEEE binary64 double format result. The implementation uses only integer and
			
 
				-// bitwise operators, so no floating point hardware support or emulation is
			
 
				-// needed. This is here so quantized operations can run non-time-critical
			
 
				-// preparation calculations on microcontrollers and other platforms without
			
 
				-// float support.
			
 
				-double DoubleFromFractionAndShift(int64_t fraction, int shift);
			
 
				-
			
 
				-// Performs a multiplication of two numbers in double format, using only integer
			
 
				-// and bitwise instructions. This is aimed at supporting housekeeping functions
			
 
				-// for quantized operations on microcontrollers without floating-point hardware.
			
 
				-double IntegerDoubleMultiply(double a, double b);
			
 
				-
			
 
				-// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
			
 
				-// greater than b. It is implemented using only integer and logical instructions
			
 
				-// so that it can be easily run on microcontrollers for quantized operations.
			
 
				-int IntegerDoubleCompare(double a, double b);
			
 
				-
			
 
				-// This first creates a multiplier in a double equivalent of
			
 
				-// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
			
 
				-// precision in the double's fractional bits.  It then splits the result into
			
 
				-// significand and exponent.
			
 
				-void PreprocessSoftmaxScaling(double beta, double input_scale,
			
 
				-                              int input_integer_bits,
			
 
				-                              int32_t* quantized_multiplier, int* left_shift);
			
 
				-// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
			
 
				-void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
			
 
				-                                    int input_integer_bits,
			
 
				-                                    int32_t* quantized_multiplier,
			
 
				-                                    int* left_shift,
			
 
				-                                    int32_t* reverse_scaling_divisor,
			
 
				-                                    int* reverse_scaling_left_shift);
			
 
				-// Calculate the largest input that will result in a within-bounds intermediate
			
 
				-// result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
			
 
				-// it must not overflow before we reduce the value by multiplication by the
			
 
				-// input multiplier.  The negative radius is used as the minimum difference in
			
 
				-// Softmax.
			
 
				-int CalculateInputRadius(int input_integer_bits, int input_left_shift,
			
 
				-                         int total_signed_bits = 31);
			
 
				-
			
 
				-// Nudges a min/max quantization range to ensure zero is zero.
			
 
				-// Gymnastics with nudged zero point is to ensure that real zero maps to
			
 
				-// an integer, which is required for e.g. zero-padding in convolutional layers.
			
 
				-// Outputs nudged_min, nudged_max, nudged_scale.
			
 
				-void NudgeQuantizationRange(const float min, const float max,
			
 
				-                            const int quant_min, const int quant_max,
			
 
				-                            float* nudged_min, float* nudged_max,
			
 
				-                            float* nudged_scale);
			
 
				-
			
 
				-// Fake quantizes (quantizes and dequantizes) input_data using the scale,
			
 
				-// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
			
 
				-// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
			
 
				-void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
			
 
				-                       const float nudged_max, const float* input_data,
			
 
				-                       float* output_data, const float size);
			
 
				-
			
 
				-// If x is approximately a power of two (with any positive or negative
			
 
				-// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
			
 
				-// returns false.
			
 
				-bool CheckedLog2(const float x, int* log2_result);
			
 
				-
			
 
				-// Decomposes an array of double multipliers into a Q0.31 int32 representation
			
 
				-// of its significand, and shift representation of its exponent.
			
 
				-//
			
 
				-// Handles an arbitrary multiplier. The 'shift' output-value is
			
 
				-// basically the 'floating-point exponent' of the multiplier:
			
 
				-// Negative for a right-shift (when the multiplier is <1), positive for a
			
 
				-// left-shift (when the multiplier is >1)
			
 
				-void QuantizeMultiplierArray(const double* effective_scales, size_t size,
			
 
				-                             int32_t* effective_scale_significand,
			
 
				-                             int* effective_shift);
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h
@@ -1,400 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <type_traits>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void Add(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                const RuntimeShape& output_shape, T* output_data) {
			
 
				-  T activation_min, activation_max;
			
 
				-  GetActivationParams(params, &activation_min, &activation_max);
			
 
				-
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = ActivationFunctionWithMinMax(
			
 
				-        input1_data[i] + input2_data[i], activation_min, activation_max);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Element-wise add that can often be used for inner loop of broadcast add as
			
 
				-// well as the non-broadcast add.
			
 
				-
			
 
				-// This function is used for 8-bit as well as for 16-bit, but the accumulator
			
 
				-// is 32-bit for both cases. The overflow does not happen due to the
			
 
				-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
			
 
				-template <typename T>
			
 
				-inline void AddElementwise(int size, const ArithmeticParams& params,
			
 
				-                           const T* input1_data, const T* input2_data,
			
 
				-                           T* output_data) {
			
 
				-  TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
			
 
				-  TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
			
 
				-  TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
			
 
				-  TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
			
 
				-
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				-    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				-    const int32_t scaled_input1_val =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				-    const int32_t scaled_input2_val =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				-    const int32_t raw_output =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            raw_sum, params.output_multiplier, params.output_shift) +
			
 
				-        params.output_offset;
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, raw_output));
			
 
				-    output_data[i] = static_cast<T>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Scalar-broadcast add that can be used for inner loop of more general
			
 
				-// broadcast add, so that, for example, scalar-broadcast with batch will still
			
 
				-// be fast.
			
 
				-inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
			
 
				-                               uint8_t input1_data, const uint8_t* input2_data,
			
 
				-                               uint8_t* output_data) {
			
 
				-  TFLITE_DCHECK_GT(params.input1_offset, -256);
			
 
				-  TFLITE_DCHECK_GT(params.input2_offset, -256);
			
 
				-  TFLITE_DCHECK_LT(params.input1_offset, 256);
			
 
				-  TFLITE_DCHECK_LT(params.input2_offset, 256);
			
 
				-
			
 
				-  const int32_t input1_val = params.input1_offset + input1_data;
			
 
				-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				-  const int32_t scaled_input1_val =
			
 
				-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				-    const int32_t scaled_input2_val =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				-    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				-    const int32_t raw_output =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            raw_sum, params.output_multiplier, params.output_shift) +
			
 
				-        params.output_offset;
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, raw_output));
			
 
				-    output_data[i] = static_cast<uint8_t>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Add(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  TFLITE_DCHECK_GT(params.input1_offset, -256);
			
 
				-  TFLITE_DCHECK_GT(params.input2_offset, -256);
			
 
				-  TFLITE_DCHECK_LT(params.input1_offset, 256);
			
 
				-  TFLITE_DCHECK_LT(params.input2_offset, 256);
			
 
				-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-inline void AddGeneralParamScale(const ArithmeticParams& params,
			
 
				-                                 const RuntimeShape& input1_shape,
			
 
				-                                 const int16_t* input1_data,
			
 
				-                                 const RuntimeShape& input2_shape,
			
 
				-                                 const int16_t* input2_data,
			
 
				-                                 const RuntimeShape& output_shape,
			
 
				-                                 int16_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  int max_value = std::numeric_limits<int16_t>::max();
			
 
				-
			
 
				-  TFLITE_DCHECK_GT(params.input1_offset, -max_value);
			
 
				-  TFLITE_DCHECK_GT(params.input2_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LT(params.input1_offset, max_value);
			
 
				-  TFLITE_DCHECK_LT(params.input2_offset, max_value);
			
 
				-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-inline void Add(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const int16_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const int16_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, int16_t* output_data,
			
 
				-                bool pot_scale = true) {
			
 
				-  if (!pot_scale) {
			
 
				-    AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
			
 
				-                         input2_data, output_shape, output_data);
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-
			
 
				-  const int input1_shift = params.input1_shift;
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-  const int16_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int16_t output_activation_max = params.quantized_activation_max;
			
 
				-
			
 
				-  TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
			
 
				-  TFLITE_DCHECK_LE(input1_shift, 0);
			
 
				-  TFLITE_DCHECK_LE(params.input2_shift, 0);
			
 
				-  const int16_t* not_shift_input =
			
 
				-      input1_shift == 0 ? input1_data : input2_data;
			
 
				-  const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
			
 
				-  const int input_right_shift =
			
 
				-      input1_shift == 0 ? -params.input2_shift : -input1_shift;
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    // F0 uses 0 integer bits, range [-1, 1].
			
 
				-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				-
			
 
				-    F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
			
 
				-    F0 scaled_input = F0::FromRaw(
			
 
				-        gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
			
 
				-    F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
			
 
				-    const int16_t raw_output = result.raw();
			
 
				-    const int16_t clamped_output = std::min(
			
 
				-        output_activation_max, std::max(output_activation_min, raw_output));
			
 
				-    output_data[i] = clamped_output;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline typename std::enable_if<!is_small_integer<T>::value, void>::type
			
 
				-BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				-                   const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                   const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                   const RuntimeShape& output_shape, T* output_data) {
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				-                                      &desc2);
			
 
				-  const RuntimeShape extended_output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				-
			
 
				-  T activation_min, activation_max;
			
 
				-  GetActivationParams(params, &activation_min, &activation_max);
			
 
				-
			
 
				-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				-  // col, channel), with extents (batches, height, width, depth), with the
			
 
				-  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				-  // typically 1 element).
			
 
				-  //
			
 
				-  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				-  // first dimension has smallest stride.
			
 
				-  //
			
 
				-  // We name our variables by their Tensorflow convention, but generate C code
			
 
				-  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				-  // best cache behavior.
			
 
				-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				-          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				-              ActivationFunctionWithMinMax<T>(
			
 
				-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
			
 
				-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
			
 
				-                  activation_min, activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// This function is used for 8-bit as well as for 16-bit, but the accumulator
			
 
				-// is 32-bit for both cases. The overflow does not happen due to the
			
 
				-// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
			
 
				-template <typename T>
			
 
				-inline typename std::enable_if<is_small_integer<T>::value, void>::type
			
 
				-BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				-                   const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                   const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                   const RuntimeShape& output_shape, T* output_data) {
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				-                                      &desc2);
			
 
				-  const RuntimeShape extended_output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				-
			
 
				-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				-  // col, channel), with extents (batches, height, width, depth), with the
			
 
				-  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				-  // typically 1 element).
			
 
				-  //
			
 
				-  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				-  // first dimension has smallest stride.
			
 
				-  //
			
 
				-  // We name our variables by their Tensorflow convention, but generate C code
			
 
				-  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				-  // best cache behavior.
			
 
				-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				-          const int32_t input1_val =
			
 
				-              params.input1_offset +
			
 
				-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
			
 
				-          const int32_t input2_val =
			
 
				-              params.input2_offset +
			
 
				-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				-          const int32_t shifted_input1_val =
			
 
				-              input1_val * (1 << params.left_shift);
			
 
				-          const int32_t shifted_input2_val =
			
 
				-              input2_val * (1 << params.left_shift);
			
 
				-          const int32_t scaled_input1_val =
			
 
				-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                  shifted_input1_val, params.input1_multiplier,
			
 
				-                  params.input1_shift);
			
 
				-          const int32_t scaled_input2_val =
			
 
				-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                  shifted_input2_val, params.input2_multiplier,
			
 
				-                  params.input2_shift);
			
 
				-          const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				-          const int32_t raw_output =
			
 
				-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                  raw_sum, params.output_multiplier, params.output_shift) +
			
 
				-              params.output_offset;
			
 
				-          const int32_t clamped_output =
			
 
				-              std::min(params.quantized_activation_max,
			
 
				-                       std::max(params.quantized_activation_min, raw_output));
			
 
				-          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				-              static_cast<T>(clamped_output);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
			
 
				-                                 const RuntimeShape& unswitched_input1_shape,
			
 
				-                                 const uint8_t* unswitched_input1_data,
			
 
				-                                 const RuntimeShape& unswitched_input2_shape,
			
 
				-                                 const uint8_t* unswitched_input2_data,
			
 
				-                                 const RuntimeShape& output_shape,
			
 
				-                                 uint8_t* output_data) {
			
 
				-  ArithmeticParams switched_params = unswitched_params;
			
 
				-  switched_params.input1_offset = unswitched_params.input2_offset;
			
 
				-  switched_params.input1_multiplier = unswitched_params.input2_multiplier;
			
 
				-  switched_params.input1_shift = unswitched_params.input2_shift;
			
 
				-  switched_params.input2_offset = unswitched_params.input1_offset;
			
 
				-  switched_params.input2_multiplier = unswitched_params.input1_multiplier;
			
 
				-  switched_params.input2_shift = unswitched_params.input1_shift;
			
 
				-
			
 
				-  const bool use_unswitched =
			
 
				-      unswitched_params.broadcast_category ==
			
 
				-      tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
			
 
				-
			
 
				-  const ArithmeticParams& params =
			
 
				-      use_unswitched ? unswitched_params : switched_params;
			
 
				-  const uint8_t* input1_data =
			
 
				-      use_unswitched ? unswitched_input1_data : unswitched_input2_data;
			
 
				-  const uint8_t* input2_data =
			
 
				-      use_unswitched ? unswitched_input2_data : unswitched_input1_data;
			
 
				-
			
 
				-  // Fivefold nested loops. The second input resets its position for each
			
 
				-  // iteration of the second loop. The first input resets its position at the
			
 
				-  // beginning of the fourth loop. The innermost loop is an elementwise add of
			
 
				-  // sections of the arrays.
			
 
				-  uint8_t* output_data_ptr = output_data;
			
 
				-  const uint8_t* input1_data_ptr = input1_data;
			
 
				-  const uint8_t* input2_data_reset = input2_data;
			
 
				-  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
			
 
				-  // between input shapes. y3 for input 1 is always broadcast, and so the
			
 
				-  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
			
 
				-  // Put another way,
			
 
				-  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
			
 
				-  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
			
 
				-  int y0 = params.broadcast_shape[0];
			
 
				-  int y1 = params.broadcast_shape[1];
			
 
				-  int y2 = params.broadcast_shape[2];
			
 
				-  int y3 = params.broadcast_shape[3];
			
 
				-  int y4 = params.broadcast_shape[4];
			
 
				-  if (y4 > 1) {
			
 
				-    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
			
 
				-    // dimension.
			
 
				-    for (int i0 = 0; i0 < y0; ++i0) {
			
 
				-      const uint8_t* input2_data_ptr;
			
 
				-      for (int i1 = 0; i1 < y1; ++i1) {
			
 
				-        input2_data_ptr = input2_data_reset;
			
 
				-        for (int i2 = 0; i2 < y2; ++i2) {
			
 
				-          for (int i3 = 0; i3 < y3; ++i3) {
			
 
				-            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
			
 
				-                           output_data_ptr);
			
 
				-            input2_data_ptr += y4;
			
 
				-            output_data_ptr += y4;
			
 
				-          }
			
 
				-          // We have broadcast y4 of input1 data y3 times, and now move on.
			
 
				-          input1_data_ptr += y4;
			
 
				-        }
			
 
				-      }
			
 
				-      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
			
 
				-      input2_data_reset = input2_data_ptr;
			
 
				-    }
			
 
				-  } else {
			
 
				-    // Special case of y4 == 1, in which the innermost loop is a single element
			
 
				-    // and can be combined with the next (y3) as an inner broadcast.
			
 
				-    //
			
 
				-    // Note that this handles the case of pure scalar broadcast when
			
 
				-    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
			
 
				-    // broadcast with batch (as y2 > 1).
			
 
				-    //
			
 
				-    // NOTE The process is the same as the above general case except simplified
			
 
				-    // for y4 == 1 and the loop over y3 is contained within the
			
 
				-    // AddScalarBroadcast function.
			
 
				-    for (int i0 = 0; i0 < y0; ++i0) {
			
 
				-      const uint8_t* input2_data_ptr;
			
 
				-      for (int i1 = 0; i1 < y1; ++i1) {
			
 
				-        input2_data_ptr = input2_data_reset;
			
 
				-        for (int i2 = 0; i2 < y2; ++i2) {
			
 
				-          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
			
 
				-                             output_data_ptr);
			
 
				-          input2_data_ptr += y3;
			
 
				-          output_data_ptr += y3;
			
 
				-          input1_data_ptr += 1;
			
 
				-        }
			
 
				-      }
			
 
				-      input2_data_reset = input2_data_ptr;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h
@@ -1,86 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-// T is expected to be either float or int.
			
 
				-template <typename T>
			
 
				-inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
			
 
				-                 const T* const* input_data, T* output_data) {
			
 
				-  // All inputs and output should have the same shape, this is checked during
			
 
				-  // Prepare stage.
			
 
				-  const size_t size = input_shape.FlatSize();
			
 
				-  for (size_t i = 0; i < size; ++i) {
			
 
				-    T x = 0;
			
 
				-    for (size_t j = 0; j < num_inputs; ++j) {
			
 
				-      x += input_data[j][i];
			
 
				-    }
			
 
				-    output_data[i] = x;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void AddN(const ArithmeticParams& params,
			
 
				-                 const RuntimeShape& input_shape, const size_t num_inputs,
			
 
				-                 const int8_t* const* input_data, int8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  // Input offset is negative input zero point. Activation tensors are
			
 
				-  // asymmetric quantized so they span the full int8 range.
			
 
				-  // All inputs should have same zero-point and scale, this is checked during
			
 
				-  // Prepare stage.
			
 
				-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
			
 
				-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
			
 
				-
			
 
				-  // All inputs and output should have the same shape, this is checked during
			
 
				-  // Prepare stage.
			
 
				-  const size_t size = input_shape.FlatSize();
			
 
				-  for (size_t i = 0; i < size; ++i) {
			
 
				-    // accumulate in scaled_x before clamping to avoid overflow
			
 
				-    const int32_t x = params.input1_offset;  // x = 0
			
 
				-    const int32_t shifted_x = x * (1 << params.left_shift);
			
 
				-    int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-        shifted_x, params.input1_multiplier, params.input1_shift);
			
 
				-
			
 
				-    for (size_t j = 0; j < num_inputs; ++j) {
			
 
				-      const int32_t y = params.input1_offset + input_data[j][i];
			
 
				-      const int32_t shifted_y = y * (1 << params.left_shift);
			
 
				-      int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          shifted_y, params.input1_multiplier, params.input1_shift);
			
 
				-      scaled_x += scaled_y;
			
 
				-    }
			
 
				-
			
 
				-    const int32_t raw_output =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            scaled_x, params.output_multiplier, params.output_shift) +
			
 
				-        params.output_offset;
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, raw_output));
			
 
				-    output_data[i] = static_cast<int8_t>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h
@@ -1,88 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
			
 
				-
			
 
				-#include <functional>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
			
 
				-  if (is_arg_max) {
			
 
				-    return std::greater<T>();
			
 
				-  } else {
			
 
				-    return std::less<T>();
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T1, typename T2, typename T3, typename Cmp>
			
 
				-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
			
 
				-               const T3* input2_data, const RuntimeShape& output_shape,
			
 
				-               T2* output_data, const Cmp& cmp) {
			
 
				-  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
			
 
				-  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
			
 
				-                   output_shape.DimensionsCount());
			
 
				-  int axis = input2_data[0];
			
 
				-  if (axis < 0) {
			
 
				-    axis += input1_shape.DimensionsCount();
			
 
				-  }
			
 
				-  const int axis_size = input1_shape.Dims(axis);
			
 
				-
			
 
				-  int outer_size = 1;
			
 
				-  for (int i = 0; i < axis; ++i) {
			
 
				-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
			
 
				-    outer_size *= input1_shape.Dims(i);
			
 
				-  }
			
 
				-
			
 
				-  int inner_size = 1;
			
 
				-  const int dims_count = input1_shape.DimensionsCount();
			
 
				-  for (int i = axis + 1; i < dims_count; ++i) {
			
 
				-    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
			
 
				-    inner_size *= input1_shape.Dims(i);
			
 
				-  }
			
 
				-  for (int outer = 0; outer < outer_size; ++outer) {
			
 
				-    for (int inner = 0; inner < inner_size; ++inner) {
			
 
				-      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
			
 
				-      T2 min_max_index = 0;
			
 
				-      for (int i = 1; i < axis_size; ++i) {
			
 
				-        const auto& curr_value =
			
 
				-            input1_data[(outer * axis_size + i) * inner_size + inner];
			
 
				-        if (cmp(curr_value, min_max_value)) {
			
 
				-          min_max_value = curr_value;
			
 
				-          min_max_index = static_cast<T2>(i);
			
 
				-        }
			
 
				-      }
			
 
				-      output_data[outer * inner_size + inner] = min_max_index;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T1, typename T2, typename T3>
			
 
				-void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
			
 
				-               const T3* input2_data, const RuntimeShape& output_shape,
			
 
				-               T2* output_data, const bool is_arg_max) {
			
 
				-  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
			
 
				-            GetComparefunction<T1>(is_arg_max));
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h
@@ -1,275 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cstdint>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-namespace batch_matmul {
			
 
				-
			
 
				-// Determine which dimension is the broadcast dimension.
			
 
				-inline int broadcast_dim(int lhs_dim, int rhs_dim) {
			
 
				-  if (lhs_dim == rhs_dim) return lhs_dim;
			
 
				-  if (lhs_dim == 1) return rhs_dim;
			
 
				-  TFLITE_DCHECK_EQ(rhs_dim, 1);
			
 
				-  return lhs_dim;
			
 
				-}
			
 
				-
			
 
				-// Compute the "extent" for iterating on this dimension.
			
 
				-// If we are broadcasting, then don't advance (i.e return 0).
			
 
				-inline int extent(const RuntimeShape& shape, int x) {
			
 
				-  if (shape.Dims(x) == 1) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-  int prod = 1;
			
 
				-  for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
			
 
				-    prod *= shape.Dims(i);
			
 
				-  }
			
 
				-  return prod;
			
 
				-}
			
 
				-
			
 
				-}  // namespace batch_matmul
			
 
				-
			
 
				-template <typename Ta, typename Tb, typename Tout>
			
 
				-inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
			
 
				-                        const RuntimeShape& rhs_shape, const Tb* rhs_data,
			
 
				-                        const RuntimeShape& output_shape, Tout* output_data) {
			
 
				-  const RuntimeShape extended_lhs_shape =
			
 
				-      RuntimeShape::ExtendedShape(5, lhs_shape);
			
 
				-  const RuntimeShape extended_rhs_shape =
			
 
				-      RuntimeShape::ExtendedShape(5, rhs_shape);
			
 
				-
			
 
				-  const int batch_dim0 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
			
 
				-  const int batch_dim1 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
			
 
				-  const int batch_dim2 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
			
 
				-
			
 
				-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
			
 
				-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
			
 
				-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
			
 
				-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
			
 
				-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
			
 
				-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
			
 
				-
			
 
				-  // Set params for each matrix multiply.
			
 
				-  const int lhs_rows = extended_lhs_shape.Dims(3);
			
 
				-  const int rhs_cols = extended_rhs_shape.Dims(4);
			
 
				-  const int accum_depth = extended_lhs_shape.Dims(4);
			
 
				-
			
 
				-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
			
 
				-    const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
			
 
				-    const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
			
 
				-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
			
 
				-      const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
			
 
				-      const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
			
 
				-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
			
 
				-        const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
			
 
				-        const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
			
 
				-        Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
			
 
				-                                       b1 * batch_dim2 + b2) *
			
 
				-                                          lhs_rows * rhs_cols;
			
 
				-        for (int j = 0; j < rhs_cols; ++j) {
			
 
				-          for (int i = 0; i < lhs_rows; ++i) {
			
 
				-            Tout total = 0;
			
 
				-            for (int k = 0; k < accum_depth; ++k) {
			
 
				-              total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
			
 
				-                       static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
			
 
				-            }
			
 
				-            int idx = lhs_rows * j + i;
			
 
				-            out_ptr[idx] = total;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
			
 
				-                        const RuntimeShape& rhs_shape, const int8_t* rhs_data,
			
 
				-                        const float* scaling_factors,
			
 
				-                        const int32_t* input_offset, int32_t* row_sums,
			
 
				-                        const RuntimeShape& output_shape, float* output_data,
			
 
				-                        bool* compute_row_sums) {
			
 
				-  const RuntimeShape extended_lhs_shape =
			
 
				-      RuntimeShape::ExtendedShape(5, lhs_shape);
			
 
				-  const RuntimeShape extended_rhs_shape =
			
 
				-      RuntimeShape::ExtendedShape(5, rhs_shape);
			
 
				-
			
 
				-  const int batch_dim0 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
			
 
				-  const int batch_dim1 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
			
 
				-  const int batch_dim2 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
			
 
				-
			
 
				-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
			
 
				-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
			
 
				-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
			
 
				-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
			
 
				-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
			
 
				-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
			
 
				-
			
 
				-  // Set params for each matrix multiply.
			
 
				-  const int lhs_rows = extended_lhs_shape.Dims(3);
			
 
				-  const int rhs_cols = extended_rhs_shape.Dims(4);
			
 
				-  const int accum_depth = extended_lhs_shape.Dims(4);
			
 
				-
			
 
				-  const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
			
 
				-  const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
			
 
				-  const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
			
 
				-  const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
			
 
				-  const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
			
 
				-  const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
			
 
				-
			
 
				-  if (!compute_row_sums || *compute_row_sums) {
			
 
				-    int num_weights_matrices = 1;
			
 
				-    for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
			
 
				-      num_weights_matrices *= extended_lhs_shape.Dims(i);
			
 
				-    }
			
 
				-    tensor_utils::ReductionSumVector(
			
 
				-        lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
			
 
				-    if (compute_row_sums) {
			
 
				-      *compute_row_sums = false;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
			
 
				-    const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
			
 
				-    const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
			
 
				-    const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
			
 
				-    const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
			
 
				-    const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
			
 
				-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
			
 
				-      const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
			
 
				-      const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
			
 
				-      const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
			
 
				-      const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
			
 
				-      const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
			
 
				-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
			
 
				-        const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
			
 
				-        const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
			
 
				-        const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
			
 
				-        const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
			
 
				-        const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
			
 
				-        float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
			
 
				-                                        b1 * batch_dim2 + b2) *
			
 
				-                                           lhs_rows * rhs_cols;
			
 
				-        for (int j = 0; j < rhs_cols; ++j) {
			
 
				-          const float batch_scaling_factor = scale_ptr2[j];
			
 
				-          const float batch_offset = static_cast<float>(ioff_ptr2[j]);
			
 
				-          for (int i = 0; i < lhs_rows; ++i) {
			
 
				-            int32_t total = 0;
			
 
				-            for (int k = 0; k < accum_depth; ++k) {
			
 
				-              total +=
			
 
				-                  lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
			
 
				-            }
			
 
				-            int32_t row_sum = woff_ptr2[i];
			
 
				-            total -= row_sum * batch_offset;
			
 
				-            int idx = lhs_rows * j + i;
			
 
				-            out_ptr[idx] += batch_scaling_factor * total;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T, typename AccumT>
			
 
				-inline void BatchMatMul(const FullyConnectedParams& params,
			
 
				-                        const RuntimeShape& lhs_shape, const T* lhs_data,
			
 
				-                        const RuntimeShape& rhs_shape, const T* rhs_data,
			
 
				-                        const RuntimeShape& output_shape, T* output_data) {
			
 
				-  const RuntimeShape extended_lhs_shape =
			
 
				-      RuntimeShape::ExtendedShape(5, lhs_shape);
			
 
				-  const RuntimeShape extended_rhs_shape =
			
 
				-      RuntimeShape::ExtendedShape(5, rhs_shape);
			
 
				-
			
 
				-  const int batch_dim0 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
			
 
				-  const int batch_dim1 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
			
 
				-  const int batch_dim2 = batch_matmul::broadcast_dim(
			
 
				-      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
			
 
				-
			
 
				-  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
			
 
				-  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
			
 
				-  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
			
 
				-  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
			
 
				-  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
			
 
				-  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
			
 
				-
			
 
				-  // Set params for each matrix multiply.
			
 
				-  const int lhs_rows = extended_lhs_shape.Dims(3);
			
 
				-  const int rhs_cols = extended_rhs_shape.Dims(4);
			
 
				-  const int accum_depth = extended_lhs_shape.Dims(4);
			
 
				-
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t filter_offset = params.weights_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-
			
 
				-  for (int b0 = 0; b0 < batch_dim0; ++b0) {
			
 
				-    const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
			
 
				-    const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
			
 
				-    for (int b1 = 0; b1 < batch_dim1; ++b1) {
			
 
				-      const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
			
 
				-      const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
			
 
				-      for (int b2 = 0; b2 < batch_dim2; ++b2) {
			
 
				-        const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
			
 
				-        const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
			
 
				-        T* out_ptr = output_data +
			
 
				-                     ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
			
 
				-                         lhs_rows * rhs_cols;
			
 
				-
			
 
				-        for (int j = 0; j < rhs_cols; ++j) {
			
 
				-          for (int i = 0; i < lhs_rows; ++i) {
			
 
				-            AccumT total = 0;
			
 
				-            for (int k = 0; k < accum_depth; ++k) {
			
 
				-              AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
			
 
				-              AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
			
 
				-              total += (lhs_val + filter_offset) * (rhs_val + input_offset);
			
 
				-            }
			
 
				-            int32_t total_scaled = MultiplyByQuantizedMultiplier(
			
 
				-                total, output_multiplier, output_shift);
			
 
				-            total_scaled += output_offset;
			
 
				-            total_scaled = std::max(total_scaled, output_activation_min);
			
 
				-            total_scaled = std::min(total_scaled, output_activation_max);
			
 
				-            const int idx = lhs_rows * j + i;
			
 
				-            out_ptr[idx] = static_cast<T>(total_scaled);
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h
@@ -1,101 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-// TODO(b/135760455): Move this method anonymous namespace in a cc file.
			
 
				-inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) {
			
 
				-  if (shape.DimensionsCount() == 4) {
			
 
				-    return shape;
			
 
				-  }
			
 
				-  RuntimeShape new_shape(4, 1);
			
 
				-  new_shape.SetDim(0, shape.Dims(0));
			
 
				-  new_shape.SetDim(1, shape.Dims(1));
			
 
				-  new_shape.SetDim(3, shape.Dims(2));
			
 
				-  return new_shape;
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape,
			
 
				-                           const T* input1_data,
			
 
				-                           const RuntimeShape& unextended_input2_shape,
			
 
				-                           const int32_t* block_shape_data,
			
 
				-                           const RuntimeShape& unextended_input3_shape,
			
 
				-                           const int32_t* crops_data,
			
 
				-                           const RuntimeShape& unextended_output_shape,
			
 
				-                           T* output_data) {
			
 
				-  ruy::profiler::ScopeLabel label("BatchToSpaceND");
			
 
				-  TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
			
 
				-                   unextended_output_shape.DimensionsCount());
			
 
				-
			
 
				-  const RuntimeShape input1_shape =
			
 
				-      ExtendShapeBatchToSpace(unextended_input1_shape);
			
 
				-  const RuntimeShape output_shape =
			
 
				-      ExtendShapeBatchToSpace(unextended_output_shape);
			
 
				-
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_batch_size = output_shape.Dims(0);
			
 
				-
			
 
				-  const int depth = input1_shape.Dims(3);
			
 
				-  const int input_width = input1_shape.Dims(2);
			
 
				-  const int input_height = input1_shape.Dims(1);
			
 
				-  const int input_batch_size = input1_shape.Dims(0);
			
 
				-
			
 
				-  const int block_shape_height = block_shape_data[0];
			
 
				-  const int block_shape_width =
			
 
				-      unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
			
 
				-  const int crops_top = crops_data[0];
			
 
				-  const int crops_left =
			
 
				-      unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0;
			
 
				-  for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) {
			
 
				-    const int out_batch = in_batch % output_batch_size;
			
 
				-    const int spatial_offset = in_batch / output_batch_size;
			
 
				-    for (int in_h = 0; in_h < input_height; ++in_h) {
			
 
				-      const int out_h = in_h * block_shape_height +
			
 
				-                        spatial_offset / block_shape_width - crops_top;
			
 
				-      if (out_h < 0 || out_h >= output_height) {
			
 
				-        continue;
			
 
				-      }
			
 
				-      for (int in_w = 0; in_w < input_width; ++in_w) {
			
 
				-        const int out_w = in_w * block_shape_width +
			
 
				-                          spatial_offset % block_shape_width - crops_left;
			
 
				-
			
 
				-        if (out_w < 0 || out_w >= output_width) {
			
 
				-          continue;
			
 
				-        }
			
 
				-        T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
			
 
				-        const T* in =
			
 
				-            input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
			
 
				-        memcpy(out, in, depth * sizeof(T));
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h
@@ -1,91 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-// Also appears to duplicate MinimumMaximum.
			
 
				-//
			
 
				-// R: Result type. T1: Input 1 type. T2: Input 2 type.
			
 
				-template <typename R, typename T1, typename T2>
			
 
				-inline void BroadcastBinaryFunction4DSlow(
			
 
				-    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
			
 
				-    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
			
 
				-    const RuntimeShape& unextended_output_shape, R* output_data,
			
 
				-    R (*func)(T1, T2)) {
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				-  const RuntimeShape output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				-
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				-                                      unextended_input2_shape, &desc1, &desc2);
			
 
				-
			
 
				-  const int* dims_data =
			
 
				-      reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
			
 
				-  for (int b = 0; b < output_shape.Dims(0); ++b) {
			
 
				-    int out_idx_b = b * dims_data[1];
			
 
				-    int in_idx1_b = desc1.strides[0] * b;
			
 
				-    int in_idx2_b = desc2.strides[0] * b;
			
 
				-    for (int y = 0; y < output_shape.Dims(1); ++y) {
			
 
				-      int out_idx_y = (out_idx_b + y) * dims_data[2];
			
 
				-      int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
			
 
				-      int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
			
 
				-      for (int x = 0; x < output_shape.Dims(2); ++x) {
			
 
				-        int out_idx_x = (out_idx_y + x) * dims_data[3];
			
 
				-        int in1_idx = in_idx1_y + desc1.strides[2] * x;
			
 
				-        int in2_idx = in_idx2_y + desc2.strides[2] * x;
			
 
				-        for (int c = 0; c < output_shape.Dims(3); ++c) {
			
 
				-          auto out_idx = out_idx_x + c;
			
 
				-          auto in1_val = input1_data[in1_idx];
			
 
				-          auto in2_val = input2_data[in2_idx];
			
 
				-          output_data[out_idx] = func(in1_val, in2_val);
			
 
				-          in1_idx += desc1.strides[3];
			
 
				-          in2_idx += desc2.strides[3];
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// R: Result type. T1: Input 1 type. T2: Input 2 type.
			
 
				-template <typename R, typename T1, typename T2>
			
 
				-inline void BinaryFunction(const RuntimeShape& input1_shape,
			
 
				-                           const T1* input1_data,
			
 
				-                           const RuntimeShape& input2_shape,
			
 
				-                           const T2* input2_data,
			
 
				-                           const RuntimeShape& output_shape, R* output_data,
			
 
				-                           R (*func)(T1, T2)) {
			
 
				-  const int flat_size =
			
 
				-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = func(input1_data[i], input2_data[i]);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h
@@ -1,56 +0,0 @@
 
				-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                   const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                   const RuntimeShape& output_shape, T* output_data) {
			
 
				-  // Gets data at the backward index i of the shape tensor. Returns 1 if the
			
 
				-  // index is out of range.
			
 
				-  auto get_shape_data = [](const RuntimeShape& shape, const T* data,
			
 
				-                           int backward_idx) -> T {
			
 
				-    int forward_idx = shape.FlatSize() - 1 - backward_idx;
			
 
				-    if (forward_idx < 0) return 1;
			
 
				-    return data[forward_idx];
			
 
				-  };
			
 
				-
			
 
				-  int output_num_elements = output_shape.FlatSize();
			
 
				-  for (int i = 0; i < output_num_elements; ++i) {
			
 
				-    int backward_i = output_num_elements - 1 - i;
			
 
				-    int shape1_i = get_shape_data(input1_shape, input1_data, i);
			
 
				-    int shape2_i = get_shape_data(input2_shape, input2_data, i);
			
 
				-    if (shape1_i == 1) {
			
 
				-      output_data[backward_i] = shape2_i;
			
 
				-    } else if (shape2_i == 1) {
			
 
				-      output_data[backward_i] = shape1_i;
			
 
				-    } else {
			
 
				-      TFLITE_CHECK_EQ(shape1_i, shape2_i);
			
 
				-      output_data[backward_i] = shape1_i;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h
@@ -1,97 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/kernel_util.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-template <int N>
			
 
				-void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
			
 
				-                   const NdArrayDesc<N>& output_desc, char* output_data,
			
 
				-                   int indexes[N], int dim, const int last_broadcasting_dim,
			
 
				-                   const int type_size) {
			
 
				-  // Copy data from input to output.
			
 
				-  if (dim == last_broadcasting_dim) {
			
 
				-    int copy_size = output_desc.strides[dim] * type_size;
			
 
				-    const char* data_src =
			
 
				-        input_data + SubscriptToIndex(input_desc, indexes) * type_size;
			
 
				-    char* data_dst =
			
 
				-        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
			
 
				-    for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
			
 
				-      memcpy(data_dst, data_src, copy_size);
			
 
				-    }
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // Recursive call to find the next broadcasting.
			
 
				-  for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
			
 
				-       ++indexes[dim]) {
			
 
				-    BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
			
 
				-                     dim + 1, last_broadcasting_dim, type_size);
			
 
				-  }
			
 
				-
			
 
				-  // Duplicate data in output tensor.
			
 
				-  indexes[dim] = 0;
			
 
				-  if (input_desc.extents[dim] != output_desc.extents[dim]) {
			
 
				-    int copy_size = output_desc.strides[dim] * type_size;
			
 
				-    char* data_src =
			
 
				-        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
			
 
				-    char* data_dst = data_src + copy_size;
			
 
				-    for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
			
 
				-      memcpy(data_dst, data_src, copy_size);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <int N>
			
 
				-inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
			
 
				-                        const char* input_data,
			
 
				-                        const RuntimeShape& unextended_output_shape,
			
 
				-                        char* output_data, TfLiteType data_type) {
			
 
				-  NdArrayDesc<N> input_desc;
			
 
				-  NdArrayDesc<N> output_desc;
			
 
				-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
			
 
				-                 &input_desc);
			
 
				-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				-                 &output_desc);
			
 
				-
			
 
				-  // Get the last dimension has broadcasting. At this dimension, the data is
			
 
				-  // copied from input tensor to output tensor.
			
 
				-  int last_broadcast_dim = -1;
			
 
				-  for (int i = N - 1; i >= 0; --i) {
			
 
				-    if (input_desc.extents[i] != output_desc.extents[i]) {
			
 
				-      last_broadcast_dim = i;
			
 
				-      break;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // If non-broadcasting, just copy data from input to output tensor.
			
 
				-  if (last_broadcast_dim == -1) {
			
 
				-    memcpy(output_data, input_data,
			
 
				-           unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // Broadcasting using memcpy.
			
 
				-  int indexes[N] = {0};
			
 
				-  BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
			
 
				-                   last_broadcast_dim, TfLiteTypeGetSize(data_type));
			
 
				-}
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h
@@ -1,37 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
			
 
				-                 const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = std::ceil(input_data[i]);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -1,280 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline bool EqualFn(T lhs, T rhs) {
			
 
				-  return lhs == rhs;
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline bool NotEqualFn(T lhs, T rhs) {
			
 
				-  return lhs != rhs;
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline bool GreaterFn(T lhs, T rhs) {
			
 
				-  return lhs > rhs;
			
 
				-}
			
 
				-template <typename T>
			
 
				-inline bool GreaterEqualFn(T lhs, T rhs) {
			
 
				-  return lhs >= rhs;
			
 
				-}
			
 
				-template <typename T>
			
 
				-inline bool LessFn(T lhs, T rhs) {
			
 
				-  return lhs < rhs;
			
 
				-}
			
 
				-template <typename T>
			
 
				-inline bool LessEqualFn(T lhs, T rhs) {
			
 
				-  return lhs <= rhs;
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-using ComparisonFn = bool (*)(T, T);
			
 
				-
			
 
				-template <typename T, ComparisonFn<T> F>
			
 
				-inline void ComparisonImpl(
			
 
				-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
			
 
				-    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
			
 
				-  const int64_t flatsize =
			
 
				-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int64_t i = 0; i < flatsize; ++i) {
			
 
				-    output_data[i] = F(input1_data[i], input2_data[i]);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <ComparisonFn<float> F>
			
 
				-inline void Comparison(const ComparisonParams& op_params,
			
 
				-                       const RuntimeShape& input1_shape,
			
 
				-                       const float* input1_data,
			
 
				-                       const RuntimeShape& input2_shape,
			
 
				-                       const float* input2_data,
			
 
				-                       const RuntimeShape& output_shape, bool* output_data) {
			
 
				-  ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
			
 
				-                           input2_data, output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-template <typename T, ComparisonFn<int32_t> F>
			
 
				-inline void ComparisonWithScaling(
			
 
				-    const ComparisonParams& op_params, const RuntimeShape& input1_shape,
			
 
				-    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				-    const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
			
 
				-  int left_shift = op_params.left_shift;
			
 
				-  int32_t input1_offset = op_params.input1_offset;
			
 
				-  int32_t input1_multiplier = op_params.input1_multiplier;
			
 
				-  int input1_shift = op_params.input1_shift;
			
 
				-  int32_t input2_offset = op_params.input2_offset;
			
 
				-  int32_t input2_multiplier = op_params.input2_multiplier;
			
 
				-  int input2_shift = op_params.input2_shift;
			
 
				-
			
 
				-  const int64_t flatsize =
			
 
				-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int64_t i = 0; i < flatsize; ++i) {
			
 
				-    const int32_t input1_val = input1_offset + input1_data[i];
			
 
				-    const int32_t input2_val = input2_offset + input2_data[i];
			
 
				-    const int32_t shifted_input1_val = input1_val * (1 << left_shift);
			
 
				-    const int32_t shifted_input2_val = input2_val * (1 << left_shift);
			
 
				-    const int32_t scaled_input1_val =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            shifted_input1_val, input1_multiplier, input1_shift);
			
 
				-    const int32_t scaled_input2_val =
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            shifted_input2_val, input2_multiplier, input2_shift);
			
 
				-    output_data[i] = F(scaled_input1_val, scaled_input2_val);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-struct BroadcastComparison4DSlowCommon {
			
 
				-  const RuntimeShape output_shape;
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-};
			
 
				-
			
 
				-inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
			
 
				-    const RuntimeShape& unextended_input1_shape,
			
 
				-    const RuntimeShape& unextended_input2_shape,
			
 
				-    const RuntimeShape& unextended_output_shape) {
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				-                                      unextended_input2_shape, &desc1, &desc2);
			
 
				-  return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
			
 
				-          desc2};
			
 
				-}
			
 
				-
			
 
				-template <typename T, ComparisonFn<T> F>
			
 
				-inline void BroadcastComparison4DSlowImpl(
			
 
				-    const ComparisonParams& op_params,
			
 
				-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
			
 
				-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
			
 
				-    const RuntimeShape& unextended_output_shape, bool* output_data) {
			
 
				-  const BroadcastComparison4DSlowCommon dims =
			
 
				-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
			
 
				-                                          unextended_input2_shape,
			
 
				-                                          unextended_output_shape);
			
 
				-
			
 
				-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
			
 
				-          output_data[Offset(dims.output_shape, b, y, x, c)] =
			
 
				-              F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
			
 
				-                input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <ComparisonFn<float> F>
			
 
				-inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
			
 
				-                                      const RuntimeShape& input1_shape,
			
 
				-                                      const float* input1_data,
			
 
				-                                      const RuntimeShape& input2_shape,
			
 
				-                                      const float* input2_data,
			
 
				-                                      const RuntimeShape& output_shape,
			
 
				-                                      bool* output_data) {
			
 
				-  BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
			
 
				-                                          input2_shape, input2_data,
			
 
				-                                          output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-template <typename T, ComparisonFn<int32_t> F>
			
 
				-inline void BroadcastComparison4DSlowWithScaling(
			
 
				-    const ComparisonParams& op_params,
			
 
				-    const RuntimeShape& unextended_input1_shape, const T* input1_data,
			
 
				-    const RuntimeShape& unextended_input2_shape, const T* input2_data,
			
 
				-    const RuntimeShape& unextended_output_shape, bool* output_data) {
			
 
				-  const BroadcastComparison4DSlowCommon dims =
			
 
				-      BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
			
 
				-                                          unextended_input2_shape,
			
 
				-                                          unextended_output_shape);
			
 
				-
			
 
				-  int left_shift = op_params.left_shift;
			
 
				-  int32_t input1_offset = op_params.input1_offset;
			
 
				-  int32_t input1_multiplier = op_params.input1_multiplier;
			
 
				-  int input1_shift = op_params.input1_shift;
			
 
				-  int32_t input2_offset = op_params.input2_offset;
			
 
				-  int32_t input2_multiplier = op_params.input2_multiplier;
			
 
				-  int input2_shift = op_params.input2_shift;
			
 
				-
			
 
				-  for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
			
 
				-          const int32_t input1_val =
			
 
				-              input1_offset +
			
 
				-              input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
			
 
				-          const int32_t input2_val =
			
 
				-              input2_offset +
			
 
				-              input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
			
 
				-          const int32_t shifted_input1_val = input1_val * (1 << left_shift);
			
 
				-          const int32_t shifted_input2_val = input2_val * (1 << left_shift);
			
 
				-          const int32_t scaled_input1_val =
			
 
				-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                  shifted_input1_val, input1_multiplier, input1_shift);
			
 
				-          const int32_t scaled_input2_val =
			
 
				-              MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                  shifted_input2_val, input2_multiplier, input2_shift);
			
 
				-          output_data[Offset(dims.output_shape, b, y, x, c)] =
			
 
				-              F(scaled_input1_val, scaled_input2_val);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-#define TFLITE_COMPARISON_OP(name)                                             \
			
 
				-  inline void name(const ComparisonParams& op_params,                          \
			
 
				-                   const RuntimeShape& input1_shape, const float* input1_data, \
			
 
				-                   const RuntimeShape& input2_shape, const float* input2_data, \
			
 
				-                   const RuntimeShape& output_shape, bool* output_data) {      \
			
 
				-    Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape,   \
			
 
				-                         input2_data, output_shape, output_data);              \
			
 
				-  }                                                                            \
			
 
				-  template <typename T>                                                        \
			
 
				-  inline void name##NoScaling(                                                 \
			
 
				-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				-      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				-      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				-      bool* output_data) {                                                     \
			
 
				-    ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data,          \
			
 
				-                                input2_shape, input2_data, output_shape,       \
			
 
				-                                output_data);                                  \
			
 
				-  }                                                                            \
			
 
				-  template <typename T>                                                        \
			
 
				-  inline void name##WithScaling(                                               \
			
 
				-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				-      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				-      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				-      bool* output_data) {                                                     \
			
 
				-    ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data,   \
			
 
				-                                       input2_shape, input2_data,              \
			
 
				-                                       output_shape, output_data);             \
			
 
				-  }                                                                            \
			
 
				-  template <typename T>                                                        \
			
 
				-  inline void Broadcast4DSlow##name##NoScaling(                                \
			
 
				-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				-      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				-      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				-      bool* output_data) {                                                     \
			
 
				-    BroadcastComparison4DSlowImpl<T, name##Fn>(                                \
			
 
				-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
			
 
				-        output_shape, output_data);                                            \
			
 
				-  }                                                                            \
			
 
				-  inline void Broadcast4DSlow##name(                                           \
			
 
				-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				-      const float* input1_data, const RuntimeShape& input2_shape,              \
			
 
				-      const float* input2_data, const RuntimeShape& output_shape,              \
			
 
				-      bool* output_data) {                                                     \
			
 
				-    BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data,  \
			
 
				-                                        input2_shape, input2_data,             \
			
 
				-                                        output_shape, output_data);            \
			
 
				-  }                                                                            \
			
 
				-  template <typename T>                                                        \
			
 
				-  inline void Broadcast4DSlow##name##WithScaling(                              \
			
 
				-      const ComparisonParams& op_params, const RuntimeShape& input1_shape,     \
			
 
				-      const T* input1_data, const RuntimeShape& input2_shape,                  \
			
 
				-      const T* input2_data, const RuntimeShape& output_shape,                  \
			
 
				-      bool* output_data) {                                                     \
			
 
				-    BroadcastComparison4DSlowWithScaling<T, name##Fn>(                         \
			
 
				-        op_params, input1_shape, input1_data, input2_shape, input2_data,       \
			
 
				-        output_shape, output_data);                                            \
			
 
				-  }
			
 
				-TFLITE_COMPARISON_OP(Equal);
			
 
				-TFLITE_COMPARISON_OP(NotEqual);
			
 
				-TFLITE_COMPARISON_OP(Greater);
			
 
				-TFLITE_COMPARISON_OP(GreaterEqual);
			
 
				-TFLITE_COMPARISON_OP(Less);
			
 
				-TFLITE_COMPARISON_OP(LessEqual);
			
 
				-#undef TFLITE_COMPARISON_OP
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h
@@ -1,141 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename Scalar>
			
 
				-inline void Concatenation(const ConcatenationParams& params,
			
 
				-                          const RuntimeShape* const* input_shapes,
			
 
				-                          const Scalar* const* input_data,
			
 
				-                          const RuntimeShape& output_shape,
			
 
				-                          Scalar* output_data) {
			
 
				-  int axis = params.axis;
			
 
				-  int inputs_count = params.inputs_count;
			
 
				-  const int concat_dimensions = output_shape.DimensionsCount();
			
 
				-  TFLITE_DCHECK_LT(axis, concat_dimensions);
			
 
				-
			
 
				-  int64_t concat_size = 0;
			
 
				-  for (int i = 0; i < inputs_count; i++) {
			
 
				-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
			
 
				-    for (int j = 0; j < concat_dimensions; j++) {
			
 
				-      if (j != axis) {
			
 
				-        MatchingDim(*input_shapes[i], j, output_shape, j);
			
 
				-      }
			
 
				-    }
			
 
				-    concat_size += input_shapes[i]->Dims(axis);
			
 
				-  }
			
 
				-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
			
 
				-  int64_t outer_size = 1;
			
 
				-  for (int i = 0; i < axis; ++i) {
			
 
				-    outer_size *= output_shape.Dims(i);
			
 
				-  }
			
 
				-  // For all input arrays,
			
 
				-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
			
 
				-  int64_t base_inner_size = 1;
			
 
				-  for (int i = axis + 1; i < concat_dimensions; ++i) {
			
 
				-    base_inner_size *= output_shape.Dims(i);
			
 
				-  }
			
 
				-
			
 
				-  Scalar* output_ptr = output_data;
			
 
				-  for (int k = 0; k < outer_size; k++) {
			
 
				-    for (int i = 0; i < inputs_count; ++i) {
			
 
				-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
			
 
				-      const Scalar* input_ptr = input_data[i] + k * copy_size;
			
 
				-      memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
			
 
				-      output_ptr += copy_size;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// TODO(b/174275780): The quantized implementation of concatentation isn't fully
			
 
				-// quantized as it takes scale as a floating point value. This should be fixed
			
 
				-// when optimizng this routine further.
			
 
				-inline void ConcatenationWithScaling(const ConcatenationParams& params,
			
 
				-                                     const RuntimeShape* const* input_shapes,
			
 
				-                                     const uint8_t* const* input_data,
			
 
				-                                     const RuntimeShape& output_shape,
			
 
				-                                     uint8_t* output_data) {
			
 
				-  int axis = params.axis;
			
 
				-  const int32_t* input_zeropoint = params.input_zeropoint;
			
 
				-  const float* input_scale = params.input_scale;
			
 
				-  int inputs_count = params.inputs_count;
			
 
				-  const int32_t output_zeropoint = params.output_zeropoint;
			
 
				-  const float output_scale = params.output_scale;
			
 
				-
			
 
				-  const int concat_dimensions = output_shape.DimensionsCount();
			
 
				-  TFLITE_DCHECK_LT(axis, concat_dimensions);
			
 
				-
			
 
				-  int64_t concat_size = 0;
			
 
				-  for (int i = 0; i < inputs_count; i++) {
			
 
				-    TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
			
 
				-    for (int j = 0; j < concat_dimensions; j++) {
			
 
				-      if (j != axis) {
			
 
				-        MatchingDim(*input_shapes[i], j, output_shape, j);
			
 
				-      }
			
 
				-    }
			
 
				-    concat_size += input_shapes[i]->Dims(axis);
			
 
				-  }
			
 
				-  TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
			
 
				-  int64_t outer_size = 1;
			
 
				-  for (int i = 0; i < axis; ++i) {
			
 
				-    outer_size *= output_shape.Dims(i);
			
 
				-  }
			
 
				-  // For all input arrays,
			
 
				-  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
			
 
				-  int64_t base_inner_size = 1;
			
 
				-  for (int i = axis + 1; i < concat_dimensions; ++i) {
			
 
				-    base_inner_size *= output_shape.Dims(i);
			
 
				-  }
			
 
				-
			
 
				-  const float inverse_output_scale = 1.f / output_scale;
			
 
				-  uint8_t* output_ptr = output_data;
			
 
				-  for (int k = 0; k < outer_size; k++) {
			
 
				-    for (int i = 0; i < inputs_count; ++i) {
			
 
				-      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
			
 
				-      const uint8_t* input_ptr = input_data[i] + k * copy_size;
			
 
				-      if (input_zeropoint[i] == output_zeropoint &&
			
 
				-          input_scale[i] == output_scale) {
			
 
				-        memcpy(output_ptr, input_ptr, copy_size);
			
 
				-      } else {
			
 
				-        const float scale = input_scale[i] * inverse_output_scale;
			
 
				-        const float bias = -input_zeropoint[i] * scale;
			
 
				-        for (int j = 0; j < copy_size; ++j) {
			
 
				-          const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
			
 
				-                                    input_ptr[j] * scale + bias)) +
			
 
				-                                output_zeropoint;
			
 
				-          output_ptr[j] = static_cast<uint8_t>(
			
 
				-              std::max<int32_t>(std::min<int32_t>(255, value), 0));
			
 
				-        }
			
 
				-      }
			
 
				-      output_ptr += copy_size;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h
@@ -1,287 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
			
 
				-                 const float* input_data, const RuntimeShape& filter_shape,
			
 
				-                 const float* filter_data, const RuntimeShape& bias_shape,
			
 
				-                 const float* bias_data, const RuntimeShape& output_shape,
			
 
				-                 float* output_data, const RuntimeShape& im2col_shape,
			
 
				-                 float* im2col_data) {
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const float output_activation_min = params.float_activation_min;
			
 
				-  const float output_activation_max = params.float_activation_max;
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-  (void)im2col_data;   // only used in optimized code.
			
 
				-  (void)im2col_shape;  // only used in optimized code.
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int filter_input_depth = filter_shape.Dims(3);
			
 
				-  const int groups = input_depth / filter_input_depth;
			
 
				-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
			
 
				-  const int filters_per_group = output_depth / groups;
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          auto group = out_channel / filters_per_group;
			
 
				-          float total = 0.f;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-
			
 
				-              // Zero padding by omitting the areas outside the image.
			
 
				-              const bool is_point_inside_image =
			
 
				-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                  (in_y < input_height);
			
 
				-
			
 
				-              if (!is_point_inside_image) {
			
 
				-                continue;
			
 
				-              }
			
 
				-              for (int in_channel = 0; in_channel < filter_input_depth;
			
 
				-                   ++in_channel) {
			
 
				-                float input_value =
			
 
				-                    input_data[Offset(input_shape, batch, in_y, in_x,
			
 
				-                                      in_channel + group * filter_input_depth)];
			
 
				-                float filter_value = filter_data[Offset(
			
 
				-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
			
 
				-                total += (input_value * filter_value);
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-          float bias_value = 0.0f;
			
 
				-          if (bias_data) {
			
 
				-            bias_value = bias_data[out_channel];
			
 
				-          }
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              ActivationFunctionWithMinMax(total + bias_value,
			
 
				-                                           output_activation_min,
			
 
				-                                           output_activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
			
 
				-                 const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-                 const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-                 const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-                 uint8_t* output_data, const RuntimeShape& im2col_shape,
			
 
				-                 uint8_t* im2col_data, void* cpu_backend_context) {
			
 
				-  (void)cpu_backend_context;  // only used in optimized code.
			
 
				-  (void)im2col_data;          // only used in optimized code.
			
 
				-  (void)im2col_shape;         // only used in optimized code.
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t filter_offset = params.weights_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int filter_input_depth = filter_shape.Dims(3);
			
 
				-  const int groups = input_depth / filter_input_depth;
			
 
				-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
			
 
				-  const int filters_per_group = output_depth / groups;
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          auto group = out_channel / filters_per_group;
			
 
				-          int32_t acc = 0;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-
			
 
				-              // Zero padding by omitting the areas outside the image.
			
 
				-              const bool is_point_inside_image =
			
 
				-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                  (in_y < input_height);
			
 
				-
			
 
				-              if (!is_point_inside_image) {
			
 
				-                continue;
			
 
				-              }
			
 
				-
			
 
				-              for (int in_channel = 0; in_channel < filter_input_depth;
			
 
				-                   ++in_channel) {
			
 
				-                int32_t input_val =
			
 
				-                    input_data[Offset(input_shape, batch, in_y, in_x,
			
 
				-                                      in_channel + group * filter_input_depth)];
			
 
				-                int32_t filter_val = filter_data[Offset(
			
 
				-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
			
 
				-                acc +=
			
 
				-                    (filter_val + filter_offset) * (input_val + input_offset);
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-          if (bias_data) {
			
 
				-            acc += bias_data[out_channel];
			
 
				-          }
			
 
				-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
			
 
				-                                              output_shift);
			
 
				-          acc += output_offset;
			
 
				-          acc = std::max(acc, output_activation_min);
			
 
				-          acc = std::min(acc, output_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              static_cast<uint8_t>(acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void HybridConvPerChannel(
			
 
				-    const ConvParams& params, float* scaling_factors_ptr,
			
 
				-    const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				-    const RuntimeShape& filter_shape, const int8_t* filter_data,
			
 
				-    const RuntimeShape& bias_shape, const float* bias_data,
			
 
				-    const RuntimeShape& output_shape, float* output_data,
			
 
				-    const RuntimeShape& im2col_shape, int8_t* im2col_data,
			
 
				-    const float* per_channel_scale, int32_t* input_offset) {
			
 
				-  (void)im2col_data;   // only used in optimized code.
			
 
				-  (void)im2col_shape;  // only used in optimized code.
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const float output_activation_min = params.float_activation_min;
			
 
				-  const float output_activation_max = params.float_activation_max;
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int filter_input_depth = filter_shape.Dims(3);
			
 
				-  const int groups = input_depth / filter_input_depth;
			
 
				-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
			
 
				-  const int filters_per_group = output_depth / groups;
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          auto group = out_channel / filters_per_group;
			
 
				-          const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-          const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-          int32_t acc = 0;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              for (int in_channel = 0; in_channel < filter_input_depth;
			
 
				-                   ++in_channel) {
			
 
				-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-                const int in_y =
			
 
				-                    in_y_origin + dilation_height_factor * filter_y;
			
 
				-                // If the location is outside the bounds of the input image,
			
 
				-                // use zero as a default value.
			
 
				-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                    (in_y < input_height)) {
			
 
				-                  int32_t input_val = input_data[Offset(
			
 
				-                      input_shape, batch, in_y, in_x,
			
 
				-                      in_channel + group * filter_input_depth)];
			
 
				-                  int32_t filter_val =
			
 
				-                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				-                                         filter_x, in_channel)];
			
 
				-                  acc += filter_val * (input_val - input_offset[batch]);
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-          float acc_float =
			
 
				-              acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
			
 
				-          if (bias_data) {
			
 
				-            acc_float += bias_data[out_channel];
			
 
				-          }
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              ActivationFunctionWithMinMax(acc_float, output_activation_min,
			
 
				-                                           output_activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h
@@ -1,175 +0,0 @@
 
				-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cstdint>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
			
 
				-                   bool exclusive, bool reverse, T* output_data) {
			
 
				-  const int32_t rank = shape.DimensionsCount();
			
 
				-  TFLITE_DCHECK_GE(rank, 1);
			
 
				-  TFLITE_DCHECK_GE(axis, 0);
			
 
				-  TFLITE_DCHECK_LT(axis, rank);
			
 
				-
			
 
				-  size_t inner = 1;
			
 
				-  size_t outer = 1;
			
 
				-  size_t depth = 1;
			
 
				-  for (int32_t i = 0; i < rank; i++) {
			
 
				-    if (i < axis)
			
 
				-      inner *= shape.Dims(i);
			
 
				-    else if (i > axis)
			
 
				-      outer *= shape.Dims(i);
			
 
				-    else
			
 
				-      depth = shape.Dims(i);
			
 
				-  }
			
 
				-
			
 
				-  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
			
 
				-    size_t outer_index_adj;
			
 
				-    if (reverse)
			
 
				-      outer_index_adj = (outer - 1) - outer_index;
			
 
				-    else
			
 
				-      outer_index_adj = outer_index;
			
 
				-    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
			
 
				-      T accumulator = 0;
			
 
				-      size_t inner_index_adj;
			
 
				-      if (reverse)
			
 
				-        inner_index_adj = (inner - 1) - inner_index;
			
 
				-      else
			
 
				-        inner_index_adj = inner_index;
			
 
				-      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
			
 
				-        size_t depth_index_adj;
			
 
				-        if (reverse)
			
 
				-          depth_index_adj = (depth - 1) - depth_index;
			
 
				-        else
			
 
				-          depth_index_adj = depth_index;
			
 
				-
			
 
				-        size_t index = outer_index_adj;
			
 
				-        index += inner_index_adj * depth * outer;
			
 
				-        index += depth_index_adj * outer;
			
 
				-
			
 
				-        if (exclusive) {
			
 
				-          output_data[index] = accumulator;
			
 
				-          accumulator += input_data[index];
			
 
				-        } else {
			
 
				-          accumulator += input_data[index];
			
 
				-          output_data[index] = accumulator;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-//
			
 
				-// Quantized INT8 CUMSUM
			
 
				-//
			
 
				-inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
			
 
				-                   const RuntimeShape& shape, int32_t axis, bool exclusive,
			
 
				-                   bool reverse, int8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  // Input offset is negative input zero point. Activation tensors are
			
 
				-  // asymmetric quantized so they span the full int8 range.
			
 
				-  // All inputs should have same zero-point and scale, this is checked during
			
 
				-  // Prepare stage.
			
 
				-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
			
 
				-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
			
 
				-
			
 
				-  const int32_t rank = shape.DimensionsCount();
			
 
				-  TFLITE_DCHECK_GE(rank, 1);
			
 
				-  TFLITE_DCHECK_GE(axis, 0);
			
 
				-  TFLITE_DCHECK_LT(axis, rank);
			
 
				-
			
 
				-  size_t inner = 1;
			
 
				-  size_t outer = 1;
			
 
				-  size_t depth = 1;
			
 
				-  for (int32_t i = 0; i < rank; i++) {
			
 
				-    if (i < axis)
			
 
				-      inner *= shape.Dims(i);
			
 
				-    else if (i > axis)
			
 
				-      outer *= shape.Dims(i);
			
 
				-    else
			
 
				-      depth = shape.Dims(i);
			
 
				-  }
			
 
				-
			
 
				-  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
			
 
				-    size_t outer_index_adj;
			
 
				-    if (reverse)
			
 
				-      outer_index_adj = (outer - 1) - outer_index;
			
 
				-    else
			
 
				-      outer_index_adj = outer_index;
			
 
				-    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
			
 
				-      int32_t accumulator = params.input1_offset;  // accumulator = 0
			
 
				-      accumulator *= (1 << params.left_shift);
			
 
				-      accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          accumulator, params.input1_multiplier, params.input1_shift);
			
 
				-
			
 
				-      size_t inner_index_adj;
			
 
				-      if (reverse)
			
 
				-        inner_index_adj = (inner - 1) - inner_index;
			
 
				-      else
			
 
				-        inner_index_adj = inner_index;
			
 
				-
			
 
				-      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
			
 
				-        size_t depth_index_adj;
			
 
				-        if (reverse)
			
 
				-          depth_index_adj = (depth - 1) - depth_index;
			
 
				-        else
			
 
				-          depth_index_adj = depth_index;
			
 
				-
			
 
				-        size_t index = outer_index_adj;
			
 
				-        index += inner_index_adj * depth * outer;
			
 
				-        index += depth_index_adj * outer;
			
 
				-
			
 
				-        const int32_t y = params.input1_offset + input_data[index];
			
 
				-        const int32_t shifted_y = y * (1 << params.left_shift);
			
 
				-        const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            shifted_y, params.input1_multiplier, params.input1_shift);
			
 
				-
			
 
				-        int32_t scaled_output;
			
 
				-        if (exclusive) {
			
 
				-          scaled_output = accumulator;
			
 
				-          accumulator += scaled_y;
			
 
				-        } else {
			
 
				-          accumulator += scaled_y;
			
 
				-          scaled_output = accumulator;
			
 
				-        }
			
 
				-
			
 
				-        const int32_t raw_output =
			
 
				-            MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                scaled_output, params.output_multiplier, params.output_shift) +
			
 
				-            params.output_offset;
			
 
				-        const int32_t clamped_output =
			
 
				-            std::min(params.quantized_activation_max,
			
 
				-                     std::max(params.quantized_activation_min, raw_output));
			
 
				-        output_data[index] = static_cast<int8_t>(clamped_output);
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h
@@ -1,79 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
			
 
				-                         const RuntimeShape& unextended_input_shape,
			
 
				-                         const T* input_data,
			
 
				-                         const RuntimeShape& unextended_output_shape,
			
 
				-                         T* output_data) {
			
 
				-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				-  const RuntimeShape input_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				-  const RuntimeShape output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				-
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_batch = input_shape.Dims(0);
			
 
				-
			
 
				-  const int output_depth = output_shape.Dims(3);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_batch = output_shape.Dims(0);
			
 
				-
			
 
				-  const int32_t block_size = op_params.block_size;
			
 
				-
			
 
				-  TFLITE_DCHECK_EQ(input_width * block_size, output_width);
			
 
				-  TFLITE_DCHECK_EQ(input_height * block_size, output_height);
			
 
				-  TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
			
 
				-  TFLITE_DCHECK_EQ(input_batch, output_batch);
			
 
				-
			
 
				-  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				-    for (int out_h = 0; out_h < output_height; ++out_h) {
			
 
				-      for (int out_w = 0; out_w < output_width; ++out_w) {
			
 
				-        for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				-          const int in_d =
			
 
				-              out_d + ((out_h % block_size) * block_size + out_w % block_size) *
			
 
				-                          output_depth;
			
 
				-
			
 
				-          const int in_w = out_w / block_size;
			
 
				-          const int in_h = out_h / block_size;
			
 
				-          const int in_b = out_b;
			
 
				-
			
 
				-          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
			
 
				-          const int output_index =
			
 
				-              Offset(output_shape, out_b, out_h, out_w, out_d);
			
 
				-
			
 
				-          output_data[output_index] = input_data[input_index];
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -1,100 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void DepthwiseConv(
			
 
				-    const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				-    const float* input_data, const RuntimeShape& filter_shape,
			
 
				-    const float* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const float* bias_data, const RuntimeShape& output_shape,
			
 
				-    float* output_data) {
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const int depth_multiplier = params.depth_multiplier;
			
 
				-  const float output_activation_min = params.float_activation_min;
			
 
				-  const float output_activation_max = params.float_activation_max;
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int ic = 0; ic < input_depth; ++ic) {
			
 
				-          for (int m = 0; m < depth_multiplier; m++) {
			
 
				-            const int oc = m + ic * depth_multiplier;
			
 
				-            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-            float total = 0.f;
			
 
				-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-                const int in_y =
			
 
				-                    in_y_origin + dilation_height_factor * filter_y;
			
 
				-                // If the location is outside the bounds of the input image,
			
 
				-                // use zero as a default value.
			
 
				-                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                    (in_y < input_height)) {
			
 
				-                  float input_value =
			
 
				-                      input_data[Offset(input_shape, b, in_y, in_x, ic)];
			
 
				-                  float filter_value = filter_data[Offset(
			
 
				-                      filter_shape, 0, filter_y, filter_x, oc)];
			
 
				-                  total += (input_value * filter_value);
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-            float bias_value = 0.0f;
			
 
				-            if (bias_data) {
			
 
				-              bias_value = bias_data[oc];
			
 
				-            }
			
 
				-            output_data[Offset(output_shape, b, out_y, out_x, oc)] =
			
 
				-                ActivationFunctionWithMinMax(total + bias_value,
			
 
				-                                             output_activation_min,
			
 
				-                                             output_activation_max);
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // end namespace reference_ops
			
 
				-}  // end namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -1,319 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-// Used in tests and template parameters to control which version of depthwise
			
 
				-// convolution is called. Primarily for reference code, and specializations
			
 
				-// forced in tests.
			
 
				-enum class DepthwiseConvImplementation {
			
 
				-  // Run all tests against kUseStandardEntry even if also testing another
			
 
				-  // kernel, since we need to be sure that the main DepthwiseConv() function in
			
 
				-  // optimized_ops.h dispatches to a correctly-executing kernel.
			
 
				-  kNone = 0,                 // The "default" option: use the normal
			
 
				-                             // DepthwiseConv kernel (entry) function.
			
 
				-  kUseGenericKernel,         // Forced use of generic kernel.
			
 
				-  kUseNeon3x3,               // 3x3 kernel that uses NEON when available.
			
 
				-  kUseNeon3x3DotProduct,     // 3x3 kernel that uses dot-product enabled NEON
			
 
				-                             // when available.
			
 
				-  kUseCModel3x3DotProduct,   // 3x3 kernel, reference C model that is intended
			
 
				-                             // to match overall design NEON code.
			
 
				-  kUseUnwound3x3DotProduct,  // 3x3 kernel, reference C model with unwound loops
			
 
				-                             // and some arrays.
			
 
				-  kUseIntrinsics3x3DotProduct,  // 3x3 kernel using NEON intrinsics.
			
 
				-};
			
 
				-
			
 
				-// Category of depthwise convolution output rounding.
			
 
				-enum class DepthwiseConvOutputRounding {
			
 
				-  kNone = 0,      // Invalid: specific method must be specified.
			
 
				-  kAwayFromZero,  // Original method: exact halves rounded away from zero.
			
 
				-  kUpward,        // Halves towards +infinity: adds 0.5 before truncate.
			
 
				-  // This is where a future kNearestEven would be placed.
			
 
				-};
			
 
				-
			
 
				-// Category of depthwise convolution depth multiplication.
			
 
				-enum class DepthwiseConvDepthMultiplication {
			
 
				-  kNoMultiplication = 0,  // Depth multiplier = 1.
			
 
				-  kUnitInputDepth,        // Input depth = 1, output depth = depth multiplier.
			
 
				-};
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-namespace depthwise_conv {
			
 
				-
			
 
				-template <DepthwiseConvOutputRounding output_rounding>
			
 
				-inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
			
 
				-                                  int shift) {
			
 
				-  TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
			
 
				-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				-}
			
 
				-
			
 
				-// Single-rounding MultiplyByQuantizedMultiplier
			
 
				-#if TFLITE_SINGLE_ROUNDING
			
 
				-template <>
			
 
				-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
			
 
				-    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				-  using gemmlowp::RoundingDivideByPOT;
			
 
				-  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				-  int left_shift = shift > 0 ? shift : 0;
			
 
				-  int right_shift = shift > 0 ? 0 : -shift;
			
 
				-  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
			
 
				-                                 x * (1 << left_shift), quantized_multiplier),
			
 
				-                             right_shift);
			
 
				-}
			
 
				-
			
 
				-template <>
			
 
				-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
			
 
				-    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				-}
			
 
				-// Double-rounding MultiplyByQuantizedMultiplier
			
 
				-#else
			
 
				-template <>
			
 
				-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
			
 
				-    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				-  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
			
 
				-}
			
 
				-
			
 
				-template <>
			
 
				-inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
			
 
				-    int32_t x, int32_t quantized_multiplier, int shift) {
			
 
				-  using gemmlowp::SaturatingRoundingDoublingHighMul;
			
 
				-  const int left_shift = shift > 0 ? shift : 0;
			
 
				-  const int right_shift = shift > 0 ? 0 : -shift;
			
 
				-  const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
			
 
				-  return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
			
 
				-                                            quantized_multiplier) +
			
 
				-          rounding_offset) >>
			
 
				-         right_shift;
			
 
				-}
			
 
				-#endif  // TFLITE_SINGLE_ROUNDING
			
 
				-
			
 
				-template <DepthwiseConvOutputRounding output_rounding>
			
 
				-struct DepthwiseConvBasicKernel {
			
 
				-  static inline void Run(
			
 
				-      const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				-      const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-      const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-      const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-      uint8_t* output_data) {
			
 
				-    const int stride_width = params.stride_width;
			
 
				-    const int stride_height = params.stride_height;
			
 
				-    const int dilation_width_factor = params.dilation_width_factor;
			
 
				-    const int dilation_height_factor = params.dilation_height_factor;
			
 
				-    const int pad_width = params.padding_values.width;
			
 
				-    const int pad_height = params.padding_values.height;
			
 
				-    const int depth_multiplier = params.depth_multiplier;
			
 
				-    const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-    const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-    const int32_t input_offset = params.input_offset;
			
 
				-    const int32_t filter_offset = params.weights_offset;
			
 
				-    const int32_t output_offset = params.output_offset;
			
 
				-    const int32_t output_multiplier = params.output_multiplier;
			
 
				-    const int output_shift = params.output_shift;
			
 
				-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				-    const int input_height = input_shape.Dims(1);
			
 
				-    const int input_width = input_shape.Dims(2);
			
 
				-    const int input_depth = input_shape.Dims(3);
			
 
				-    const int filter_height = filter_shape.Dims(1);
			
 
				-    const int filter_width = filter_shape.Dims(2);
			
 
				-    const int output_height = output_shape.Dims(1);
			
 
				-    const int output_width = output_shape.Dims(2);
			
 
				-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-
			
 
				-    for (int b = 0; b < batches; ++b) {
			
 
				-      for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-        for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-          for (int ic = 0; ic < input_depth; ++ic) {
			
 
				-            for (int m = 0; m < depth_multiplier; m++) {
			
 
				-              const int oc = m + ic * depth_multiplier;
			
 
				-              const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-              const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-              int32_t acc = 0;
			
 
				-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-                  const int in_x =
			
 
				-                      in_x_origin + dilation_width_factor * filter_x;
			
 
				-                  const int in_y =
			
 
				-                      in_y_origin + dilation_height_factor * filter_y;
			
 
				-                  // If the location is outside the bounds of the input image,
			
 
				-                  // use zero as a default value.
			
 
				-                  if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                      (in_y < input_height)) {
			
 
				-                    int32_t input_val =
			
 
				-                        input_data[Offset(input_shape, b, in_y, in_x, ic)];
			
 
				-                    int32_t filter_val = filter_data[Offset(
			
 
				-                        filter_shape, 0, filter_y, filter_x, oc)];
			
 
				-                    acc += (filter_val + filter_offset) *
			
 
				-                           (input_val + input_offset);
			
 
				-                  }
			
 
				-                }
			
 
				-              }
			
 
				-              if (bias_data) {
			
 
				-                acc += bias_data[oc];
			
 
				-              }
			
 
				-              acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
			
 
				-                                                        output_shift);
			
 
				-              acc += output_offset;
			
 
				-              acc = std::max(acc, output_activation_min);
			
 
				-              acc = std::min(acc, output_activation_max);
			
 
				-              output_data[Offset(output_shape, b, out_y, out_x, oc)] =
			
 
				-                  static_cast<uint8_t>(acc);
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // TODO(b/148596273): Reconcile reference versions, perhaps with common
			
 
				-  // MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
			
 
				-  static inline void RunPerChannel(
			
 
				-      const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				-      const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-      const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-      const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-      int8_t* output_data) {
			
 
				-    // Get parameters.
			
 
				-    // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
			
 
				-    const int stride_width = params.stride_width;
			
 
				-    const int stride_height = params.stride_height;
			
 
				-    const int dilation_width_factor = params.dilation_width_factor;
			
 
				-    const int dilation_height_factor = params.dilation_height_factor;
			
 
				-    const int pad_width = params.padding_values.width;
			
 
				-    const int pad_height = params.padding_values.height;
			
 
				-    const int depth_multiplier = params.depth_multiplier;
			
 
				-    const int32_t input_offset = params.input_offset;
			
 
				-    const int32_t output_offset = params.output_offset;
			
 
				-    const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-    const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-    const int32_t* output_multiplier = params.output_multiplier_per_channel;
			
 
				-    const int32_t* output_shift = params.output_shift_per_channel;
			
 
				-
			
 
				-    // Check dimensions of the tensors.
			
 
				-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-    TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-    const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				-    const int input_height = input_shape.Dims(1);
			
 
				-    const int input_width = input_shape.Dims(2);
			
 
				-    const int input_depth = input_shape.Dims(3);
			
 
				-    const int filter_height = filter_shape.Dims(1);
			
 
				-    const int filter_width = filter_shape.Dims(2);
			
 
				-    const int output_height = output_shape.Dims(1);
			
 
				-    const int output_width = output_shape.Dims(2);
			
 
				-    TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-
			
 
				-    for (int batch = 0; batch < batches; ++batch) {
			
 
				-      for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-        for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-          for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				-            for (int m = 0; m < depth_multiplier; ++m) {
			
 
				-              const int output_channel = m + in_channel * depth_multiplier;
			
 
				-              const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-              const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-              int32_t acc = 0;
			
 
				-              for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-                for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-                  const int in_x =
			
 
				-                      in_x_origin + dilation_width_factor * filter_x;
			
 
				-                  const int in_y =
			
 
				-                      in_y_origin + dilation_height_factor * filter_y;
			
 
				-                  // Zero padding by omitting the areas outside the image.
			
 
				-                  const bool is_point_inside_image =
			
 
				-                      (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                      (in_y < input_height);
			
 
				-                  if (is_point_inside_image) {
			
 
				-                    int32_t input_val = input_data[Offset(
			
 
				-                        input_shape, batch, in_y, in_x, in_channel)];
			
 
				-                    int32_t filter_val = filter_data[Offset(
			
 
				-                        filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				-                    // Accumulate with 32 bits accumulator.
			
 
				-                    // In the nudging process during model quantization, we
			
 
				-                    // force real value of 0.0 be represented by a quantized
			
 
				-                    // value. This guarantees that the input_offset is a int8_t,
			
 
				-                    // even though it is represented using int32_t. int32_t +=
			
 
				-                    // int8_t
			
 
				-                    // * (int8_t - int8_t) so the highest value we can get from
			
 
				-                    // each accumulation is [-127, 127] * ([-128, 127] -
			
 
				-                    // [-128, 127]), which is [-32512, 32512]. log2(32512)
			
 
				-                    // = 14.98, which means we can accumulate at least 2^16
			
 
				-                    // multiplications without overflow. The accumulator is
			
 
				-                    // applied to a filter so the accumulation logic will hold
			
 
				-                    // as long as the filter size (filter_y * filter_x *
			
 
				-                    // in_channel) does not exceed 2^16, which is the case in
			
 
				-                    // all the models we have seen so far.
			
 
				-                    acc += filter_val * (input_val + input_offset);
			
 
				-                  }
			
 
				-                }
			
 
				-              }
			
 
				-              if (bias_data) {
			
 
				-                acc += bias_data[output_channel];
			
 
				-              }
			
 
				-              acc = DepthwiseConvRound<output_rounding>(
			
 
				-                  acc, output_multiplier[output_channel],
			
 
				-                  output_shift[output_channel]);
			
 
				-              acc += output_offset;
			
 
				-              acc = std::max(acc, output_activation_min);
			
 
				-              acc = std::min(acc, output_activation_max);
			
 
				-              output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				-                                 output_channel)] = static_cast<int8_t>(acc);
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-};
			
 
				-
			
 
				-}  // namespace depthwise_conv
			
 
				-
			
 
				-inline void DepthwiseConv(
			
 
				-    const DepthwiseParams& params, const RuntimeShape& input_shape,
			
 
				-    const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    uint8_t* output_data) {
			
 
				-  return depthwise_conv::DepthwiseConvBasicKernel<
			
 
				-      DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
			
 
				-                                                       input_data, filter_shape,
			
 
				-                                                       filter_data, bias_shape,
			
 
				-                                                       bias_data, output_shape,
			
 
				-                                                       output_data);
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // end namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h
@@ -1,78 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
			
 
				-
			
 
				-#include <limits.h>
			
 
				-
			
 
				-#include <vector>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-// Dequantizes into a float without rounding.
			
 
				-template <typename InputT, typename OutputT>
			
 
				-inline void Dequantize(const tflite::DequantizationParams& op_params,
			
 
				-                       const RuntimeShape& input_shape,
			
 
				-                       const InputT* input_data,
			
 
				-                       const RuntimeShape& output_shape, OutputT* output_data) {
			
 
				-  int32_t zero_point = op_params.zero_point;
			
 
				-  const double scale = op_params.scale;
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    const int32_t val = input_data[i];
			
 
				-    const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
			
 
				-    output_data[i] = result;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Dequantizes per-channel quantized tensor to float.
			
 
				-template <typename T>
			
 
				-inline void PerChannelDequantize(
			
 
				-    const tflite::PerChannelDequantizationParams& op_params,
			
 
				-    const RuntimeShape& input_shape, const T* input_data,
			
 
				-    const RuntimeShape& output_shape, float* output_data) {
			
 
				-  // Ensure flat size is same.
			
 
				-  MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  const int32_t* zero_point = op_params.zero_point;
			
 
				-  const float* scale = op_params.scale;
			
 
				-  const int32_t quantized_dimension = op_params.quantized_dimension;
			
 
				-  const int32_t num_dims = input_shape.DimensionsCount();
			
 
				-  const int32_t* dims_data = input_shape.DimsData();
			
 
				-  std::vector<int> current_dim(num_dims, 0);
			
 
				-
			
 
				-  do {
			
 
				-    size_t offset =
			
 
				-        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
			
 
				-                            current_dim.data(), 0, nullptr);
			
 
				-    const int channel = current_dim[quantized_dimension];
			
 
				-    const int32_t val = input_data[offset];
			
 
				-    const float result =
			
 
				-        static_cast<float>(scale[channel] * (val - zero_point[channel]));
			
 
				-    output_data[offset] = result;
			
 
				-  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
			
 
				-                     current_dim.data()));
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-
			
 
				-}  // namespace tflite
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h
@@ -1,247 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  // Input offset is negative input zero point. Activation tensors are
			
 
				-  // asymmetric quantized so they span the full int8 range.
			
 
				-  constexpr int32_t max_value =
			
 
				-      static_cast<int32_t>(std::numeric_limits<T>::max());
			
 
				-  TFLITE_DCHECK_GE(params.input1_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LE(params.input1_offset, max_value);
			
 
				-  TFLITE_DCHECK_GE(params.input2_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LE(params.input2_offset, max_value);
			
 
				-  TFLITE_DCHECK_GE(params.output_offset, -max_value);
			
 
				-  TFLITE_DCHECK_LE(params.output_offset, max_value);
			
 
				-}
			
 
				-
			
 
				-// Element-wise div that can often be used for inner loop of broadcast Div as
			
 
				-// well as the non-broadcast Div.
			
 
				-template <typename T>
			
 
				-inline void DivElementwise(int size, const ArithmeticParams& params,
			
 
				-                           const T* input1_data, const T* input2_data,
			
 
				-                           T* output_data) {
			
 
				-  DivCheckArithmeticParams<T>(params);
			
 
				-
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				-    int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				-    TFLITE_DCHECK_NE(input2_val, 0);
			
 
				-    if (input2_val < 0) {
			
 
				-      // Invert signs to avoid a negative input2_val as input2_inv needs to be
			
 
				-      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
			
 
				-      input1_val = -input1_val;
			
 
				-      input2_val = -input2_val;
			
 
				-    }
			
 
				-    int recip_shift;
			
 
				-    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
			
 
				-    const int headroom = CountLeadingSignBits(input1_val);
			
 
				-    const int32_t unscaled_quotient =
			
 
				-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
			
 
				-                                                    headroom);
			
 
				-    const int total_shift = params.output_shift - recip_shift - headroom;
			
 
				-    const int32_t unclamped_result =
			
 
				-        params.output_offset +
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            unscaled_quotient, params.output_multiplier, total_shift);
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				-    output_data[i] = static_cast<T>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Div(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const uint8_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const uint8_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-inline void Div(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const int8_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const int8_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  DivElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-template <typename T, int N = 5>
			
 
				-inline void BroadcastDivSlowQuantized(
			
 
				-    const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
			
 
				-    const T* input1_data, const RuntimeShape& unextended_input2_shape,
			
 
				-    const T* input2_data, const RuntimeShape& unextended_output_shape,
			
 
				-    T* output_data) {
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
			
 
				-
			
 
				-  NdArrayDesc<N> desc1;
			
 
				-  NdArrayDesc<N> desc2;
			
 
				-  NdArrayDesc<N> output_desc;
			
 
				-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				-                                      unextended_input2_shape, &desc1, &desc2);
			
 
				-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				-                 &output_desc);
			
 
				-
			
 
				-  DivCheckArithmeticParams<T>(params);
			
 
				-
			
 
				-  auto div_func = [&](int indexes[N]) {
			
 
				-    int32_t input1_val =
			
 
				-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
			
 
				-    int32_t input2_val =
			
 
				-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
			
 
				-    TFLITE_DCHECK_NE(input2_val, 0);
			
 
				-    if (input2_val < 0) {
			
 
				-      // Invert signs to avoid a negative input2_val as input2_inv needs to be
			
 
				-      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
			
 
				-      input1_val = -input1_val;
			
 
				-      input2_val = -input2_val;
			
 
				-    }
			
 
				-    int recip_shift;
			
 
				-    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
			
 
				-    const int headroom = CountLeadingSignBits(input1_val);
			
 
				-    const int32_t unscaled_quotient =
			
 
				-        MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
			
 
				-                                                    headroom);
			
 
				-    const int total_shift = params.output_shift - recip_shift - headroom;
			
 
				-    const int32_t unclamped_result =
			
 
				-        params.output_offset +
			
 
				-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-            unscaled_quotient, params.output_multiplier, total_shift);
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				-    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				-        static_cast<T>(clamped_output);
			
 
				-  };
			
 
				-  NDOpsHelper<N>(output_desc, div_func);
			
 
				-}
			
 
				-
			
 
				-template <int N = 5>
			
 
				-inline void BroadcastDivSlow(const ArithmeticParams& params,
			
 
				-                             const RuntimeShape& unextended_input1_shape,
			
 
				-                             const uint8_t* input1_data,
			
 
				-                             const RuntimeShape& unextended_input2_shape,
			
 
				-                             const uint8_t* input2_data,
			
 
				-                             const RuntimeShape& unextended_output_shape,
			
 
				-                             uint8_t* output_data) {
			
 
				-  BroadcastDivSlowQuantized<uint8_t, N>(
			
 
				-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
			
 
				-      input2_data, unextended_output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-template <int N = 5>
			
 
				-inline void BroadcastDivSlow(const ArithmeticParams& params,
			
 
				-                             const RuntimeShape& unextended_input1_shape,
			
 
				-                             const int8_t* input1_data,
			
 
				-                             const RuntimeShape& unextended_input2_shape,
			
 
				-                             const int8_t* input2_data,
			
 
				-                             const RuntimeShape& unextended_output_shape,
			
 
				-                             int8_t* output_data) {
			
 
				-  BroadcastDivSlowQuantized<int8_t, N>(
			
 
				-      params, unextended_input1_shape, input1_data, unextended_input2_shape,
			
 
				-      input2_data, unextended_output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
			
 
				-// dimensionality if the runtime code does a single loop over one dimension
			
 
				-// that handles broadcasting as the base case. The code generator would then
			
 
				-// generate max(D1, D2) nested for loops.
			
 
				-template <typename T, int N = 5>
			
 
				-void BroadcastDivSlow(const ArithmeticParams& params,
			
 
				-                      const RuntimeShape& unextended_input1_shape,
			
 
				-                      const T* input1_data,
			
 
				-                      const RuntimeShape& unextended_input2_shape,
			
 
				-                      const T* input2_data,
			
 
				-                      const RuntimeShape& unextended_output_shape,
			
 
				-                      T* output_data) {
			
 
				-  T output_activation_min;
			
 
				-  T output_activation_max;
			
 
				-  GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
			
 
				-
			
 
				-  NdArrayDesc<N> desc1;
			
 
				-  NdArrayDesc<N> desc2;
			
 
				-  NdArrayDesc<N> output_desc;
			
 
				-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
			
 
				-                                      unextended_input2_shape, &desc1, &desc2);
			
 
				-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				-                 &output_desc);
			
 
				-
			
 
				-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				-  // col, channel), with extents (batches, height, width, depth), with the
			
 
				-  // trailing dimension changing most rapidly (channels has the smallest
			
 
				-  // stride, typically 1 element).
			
 
				-  //
			
 
				-  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				-  // first dimension has smallest stride.
			
 
				-
			
 
				-  auto div_func = [&](int indexes[N]) {
			
 
				-    output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				-        ActivationFunctionWithMinMax(
			
 
				-            input1_data[SubscriptToIndex(desc1, indexes)] /
			
 
				-                input2_data[SubscriptToIndex(desc2, indexes)],
			
 
				-            output_activation_min, output_activation_max);
			
 
				-  };
			
 
				-  NDOpsHelper<N>(output_desc, div_func);
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void Div(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                const RuntimeShape& output_shape, T* output_data) {
			
 
				-  T output_activation_min;
			
 
				-  T output_activation_max;
			
 
				-  GetActivationParams(params, &output_activation_min, &output_activation_max);
			
 
				-
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = ActivationFunctionWithMinMax(
			
 
				-        input1_data[i] / input2_data[i], output_activation_min,
			
 
				-        output_activation_max);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h
@@ -1,37 +0,0 @@
 
				-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void Elu(const RuntimeShape& input_shape, const float* input_data,
			
 
				-                const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const float val = input_data[i];
			
 
				-    output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h
@@ -1,38 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void Exp(const T* input_data, const size_t num_elements,
			
 
				-                T* output_data) {
			
 
				-  ruy::profiler::ScopeLabel label("Exp");
			
 
				-  for (size_t idx = 0; idx < num_elements; ++idx) {
			
 
				-    output_data[idx] = std::exp(input_data[idx]);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h
@@ -1,38 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-void Fill(const RuntimeShape& value_shape, const T* value_data,
			
 
				-          const RuntimeShape& output_shape, T* output_data) {
			
 
				-  TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0);
			
 
				-  const int flat_size = output_shape.FlatSize();
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = *value_data;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h
@@ -1,39 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void Floor(const RuntimeShape& input_shape, const float* input_data,
			
 
				-                  const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    int offset = i;
			
 
				-    output_data[offset] = std::floor(input_data[offset]);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h
@@ -1,35 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-#include <functional>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-T FloorDiv(T input1, T input2) {
			
 
				-  return std::floor(std::divides<double>()(static_cast<double>(input1),
			
 
				-                                           static_cast<double>(input2)));
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h
@@ -1,44 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-#include <functional>
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-T FloorMod(T input1, T input2) {
			
 
				-  struct FloatMod {
			
 
				-    float operator()(const float lhs, const float rhs) const {
			
 
				-      return std::fmod(lhs, rhs);
			
 
				-    }
			
 
				-  };
			
 
				-  using ModFunc = typename std::conditional<std::is_integral<T>::value,
			
 
				-                                            std::modulus<T>, FloatMod>::type;
			
 
				-  ModFunc mod_func;
			
 
				-  T trunc_mod = mod_func(input1, input2);
			
 
				-  return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
			
 
				-             ? (trunc_mod + input2)
			
 
				-             : trunc_mod;
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h
@@ -1,323 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void FullyConnected(
			
 
				-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				-    const float* input_data, const RuntimeShape& weights_shape,
			
 
				-    const float* weights_data, const RuntimeShape& bias_shape,
			
 
				-    const float* bias_data, const RuntimeShape& output_shape,
			
 
				-    float* output_data) {
			
 
				-  const float output_activation_min = params.float_activation_min;
			
 
				-  const float output_activation_max = params.float_activation_max;
			
 
				-  // TODO(b/62193649): This really should be:
			
 
				-  //     const int batches = ArraySize(output_dims, 1);
			
 
				-  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				-  // dimension with the runtime batch size, as we don't keep track for each
			
 
				-  // array of which dimension is the batch dimension in it.
			
 
				-  const int output_dims_count = output_shape.DimensionsCount();
			
 
				-  const int weights_dims_count = weights_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
			
 
				-  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
			
 
				-                                       output_shape, output_dims_count - 1);
			
 
				-  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      float total = 0.f;
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        total += input_data[b * accum_depth + d] *
			
 
				-                 weights_data[out_c * accum_depth + d];
			
 
				-      }
			
 
				-      float bias_value = 0.0f;
			
 
				-      if (bias_data) {
			
 
				-        bias_value = bias_data[out_c];
			
 
				-      }
			
 
				-      output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
			
 
				-          total + bias_value, output_activation_min, output_activation_max);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void FullyConnected(
			
 
				-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				-    const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    uint8_t* output_data) {
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t filter_offset = params.weights_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  // TODO(b/62193649): This really should be:
			
 
				-  //     const int batches = ArraySize(output_dims, 1);
			
 
				-  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				-  // dimension with the runtime batch size, as we don't keep track for each
			
 
				-  // array of which dimension is the batch dimension in it.
			
 
				-  const int output_dim_count = output_shape.DimensionsCount();
			
 
				-  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
			
 
				-                                       output_shape, output_dim_count - 1);
			
 
				-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      int32_t acc = 0;
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        int32_t input_val = input_data[b * accum_depth + d];
			
 
				-        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				-        acc += (filter_val + filter_offset) * (input_val + input_offset);
			
 
				-      }
			
 
				-      if (bias_data) {
			
 
				-        acc += bias_data[out_c];
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				-      acc += output_offset;
			
 
				-      acc = std::max(acc, output_activation_min);
			
 
				-      acc = std::min(acc, output_activation_max);
			
 
				-      output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void FullyConnected(
			
 
				-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				-    const uint8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const uint8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data) {
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t filter_offset = params.weights_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  TFLITE_DCHECK_EQ(output_offset, 0);
			
 
				-  // TODO(b/62193649): This really should be:
			
 
				-  //     const int batches = ArraySize(output_dims, 1);
			
 
				-  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				-  // dimension with the runtime batch size, as we don't keep track for each
			
 
				-  // array of which dimension is the batch dimension in it.
			
 
				-  const int output_dim_count = output_shape.DimensionsCount();
			
 
				-  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				-  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
			
 
				-                                       output_shape, output_dim_count - 1);
			
 
				-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      // Internal accumulation.
			
 
				-      // Initialize accumulator with the bias-value.
			
 
				-      int32_t accum = bias_data[out_c];
			
 
				-      // Accumulation loop.
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        int16_t input_val = input_data[b * accum_depth + d] + input_offset;
			
 
				-        int16_t filter_val =
			
 
				-            filter_data[out_c * accum_depth + d] + filter_offset;
			
 
				-        accum += filter_val * input_val;
			
 
				-      }
			
 
				-      // Down-scale the final int32_t accumulator to the scale used by our
			
 
				-      // (16-bit, typically 3 integer bits) fixed-point format. The quantized
			
 
				-      // multiplier and shift here have been pre-computed offline
			
 
				-      // (e.g. by toco).
			
 
				-      accum =
			
 
				-          MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
			
 
				-      // Saturate, cast to int16_t, and store to output array.
			
 
				-      accum = std::max(accum, output_activation_min - output_offset);
			
 
				-      accum = std::min(accum, output_activation_max - output_offset);
			
 
				-      accum += output_offset;
			
 
				-      output_data[out_c + output_depth * b] = accum;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void ShuffledFullyConnected(
			
 
				-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				-    const uint8_t* input_data, const RuntimeShape& weights_shape,
			
 
				-    const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-
			
 
				-  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
			
 
				-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
			
 
				-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				-  // TODO(b/62193649): This really should be:
			
 
				-  //     const int batches = ArraySize(output_dims, 1);
			
 
				-  // but the current --variable_batch hack consists in overwriting the 3rd
			
 
				-  // dimension with the runtime batch size, as we don't keep track for each
			
 
				-  // array of which dimension is the batch dimension in it.
			
 
				-  const int output_dim_count = output_shape.DimensionsCount();
			
 
				-  const int weights_dim_count = weights_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				-  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
			
 
				-                                       output_shape, output_dim_count - 1);
			
 
				-  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
			
 
				-  TFLITE_DCHECK((accum_depth % 16) == 0);
			
 
				-  TFLITE_DCHECK((output_depth % 4) == 0);
			
 
				-
			
 
				-  // Shuffling and xoring of input activations into the workspace buffer
			
 
				-  uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
			
 
				-  if (batches == 1) {
			
 
				-    for (int i = 0; i < accum_depth; i++) {
			
 
				-      shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
			
 
				-    }
			
 
				-  } else if (batches == 4) {
			
 
				-    for (int c = 0; c < accum_depth; c += 16) {
			
 
				-      for (int b = 0; b < 4; b++) {
			
 
				-        const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
			
 
				-        for (int j = 0; j < 16; j++) {
			
 
				-          uint8_t src_val = *src_data_ptr++;
			
 
				-          // Flip the sign bit, so that the kernel will only need to
			
 
				-          // reinterpret these uint8_t values as int8_t, getting for free the
			
 
				-          // subtraction of the zero_point value 128.
			
 
				-          uint8_t dst_val = src_val ^ 0x80;
			
 
				-          *shuffled_input_workspace_ptr++ = dst_val;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  } else {
			
 
				-    TFLITE_DCHECK(false);
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // Actual computation
			
 
				-  if (batches == 1) {
			
 
				-    int16_t* output_ptr = output_data;
			
 
				-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
			
 
				-    // so that just reinterpreting them as int8_t values is equivalent to
			
 
				-    // subtracting 128 from them, thus implementing for free the subtraction of
			
 
				-    // the zero_point value 128.
			
 
				-    const int8_t* shuffled_weights_ptr =
			
 
				-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
			
 
				-    // Likewise, we preshuffled and pre-xored the input data above.
			
 
				-    const int8_t* shuffled_input_data =
			
 
				-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
			
 
				-    for (int c = 0; c < output_depth; c += 4) {
			
 
				-      // Internal accumulation.
			
 
				-      // Initialize accumulator with the bias-value.
			
 
				-      int32_t accum[4] = {0};
			
 
				-      // Accumulation loop.
			
 
				-      for (int d = 0; d < accum_depth; d += 16) {
			
 
				-        for (int i = 0; i < 4; i++) {
			
 
				-          for (int j = 0; j < 16; j++) {
			
 
				-            int8_t input_val = shuffled_input_data[d + j];
			
 
				-            int8_t weights_val = *shuffled_weights_ptr++;
			
 
				-            accum[i] += weights_val * input_val;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-      for (int i = 0; i < 4; i++) {
			
 
				-        // Add bias value
			
 
				-        int32_t acc = accum[i] + bias_data[c + i];
			
 
				-        // Down-scale the final int32_t accumulator to the scale used by our
			
 
				-        // (16-bit, typically 3 integer bits) fixed-point format. The quantized
			
 
				-        // multiplier and shift here have been pre-computed offline
			
 
				-        // (e.g. by toco).
			
 
				-        acc =
			
 
				-            MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				-        // Saturate, cast to int16_t, and store to output array.
			
 
				-        acc = std::max(acc, output_activation_min);
			
 
				-        acc = std::min(acc, output_activation_max);
			
 
				-        output_ptr[c + i] = acc;
			
 
				-      }
			
 
				-    }
			
 
				-  } else if (batches == 4) {
			
 
				-    int16_t* output_ptr = output_data;
			
 
				-    // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
			
 
				-    // so that just reinterpreting them as int8_t values is equivalent to
			
 
				-    // subtracting 128 from them, thus implementing for free the subtraction of
			
 
				-    // the zero_point value 128.
			
 
				-    const int8_t* shuffled_weights_ptr =
			
 
				-        reinterpret_cast<const int8_t*>(shuffled_weights_data);
			
 
				-    // Likewise, we preshuffled and pre-xored the input data above.
			
 
				-    const int8_t* shuffled_input_data =
			
 
				-        reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
			
 
				-    for (int c = 0; c < output_depth; c += 4) {
			
 
				-      const int8_t* shuffled_input_ptr = shuffled_input_data;
			
 
				-      // Accumulation loop.
			
 
				-      // Internal accumulation.
			
 
				-      // Initialize accumulator with the bias-value.
			
 
				-      int32_t accum[4][4];
			
 
				-      for (int i = 0; i < 4; i++) {
			
 
				-        for (int b = 0; b < 4; b++) {
			
 
				-          accum[i][b] = 0;
			
 
				-        }
			
 
				-      }
			
 
				-      for (int d = 0; d < accum_depth; d += 16) {
			
 
				-        for (int i = 0; i < 4; i++) {
			
 
				-          for (int b = 0; b < 4; b++) {
			
 
				-            for (int j = 0; j < 16; j++) {
			
 
				-              int8_t input_val = shuffled_input_ptr[16 * b + j];
			
 
				-              int8_t weights_val = shuffled_weights_ptr[16 * i + j];
			
 
				-              accum[i][b] += weights_val * input_val;
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-        shuffled_input_ptr += 64;
			
 
				-        shuffled_weights_ptr += 64;
			
 
				-      }
			
 
				-      for (int i = 0; i < 4; i++) {
			
 
				-        for (int b = 0; b < 4; b++) {
			
 
				-          // Add bias value
			
 
				-          int32_t acc = accum[i][b] + bias_data[c + i];
			
 
				-          // Down-scale the final int32_t accumulator to the scale used by our
			
 
				-          // (16-bit, typically 3 integer bits) fixed-point format. The
			
 
				-          // quantized multiplier and shift here have been pre-computed offline
			
 
				-          // (e.g. by toco).
			
 
				-          acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
			
 
				-                                              output_shift);
			
 
				-          // Saturate, cast to int16_t, and store to output array.
			
 
				-          acc = std::max(acc, output_activation_min);
			
 
				-          acc = std::min(acc, output_activation_max);
			
 
				-          output_ptr[b * output_depth + c + i] = acc;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  } else {
			
 
				-    TFLITE_DCHECK(false);
			
 
				-    return;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
@@ -1,145 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-inline void CheckArithmeticParams(const ArithmeticParams& params) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  // Input offset is negative input zero point. Activation tensors are
			
 
				-  // asymmetric quantized so they span the full int8 range.
			
 
				-  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
			
 
				-  TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
			
 
				-  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
			
 
				-  TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
			
 
				-}
			
 
				-
			
 
				-inline void ElementWise(
			
 
				-    int size, const ArithmeticParams& params, const int8_t* input1_data,
			
 
				-    const int8_t* input2_data, int8_t* output_data,
			
 
				-    void (*check_arithmetic_params)(const ArithmeticParams&),
			
 
				-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
			
 
				-  CheckArithmeticParams(params);
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    output_data[i] = binary_func(input1_data[i], input2_data[i], params);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void BroadcastBinaryFunction4DSlow(
			
 
				-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				-    const int8_t* input1_data, const RuntimeShape& input2_shape,
			
 
				-    const int8_t* input2_data, const RuntimeShape& output_shape,
			
 
				-    int8_t* output_data,
			
 
				-    void (*check_arithmetic_params)(const ArithmeticParams&),
			
 
				-    int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				-                                      &desc2);
			
 
				-  const RuntimeShape extended_output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				-
			
 
				-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
			
 
				-  // col, channel), with extents (batches, height, width, depth), with the
			
 
				-  // trailing dimension changing most rapidly (channels has the smallest stride,
			
 
				-  // typically 1 element).
			
 
				-  //
			
 
				-  // In generated C code, we store arrays with the dimensions reversed. The
			
 
				-  // first dimension has smallest stride.
			
 
				-  //
			
 
				-  // We name our variables by their Tensorflow convention, but generate C code
			
 
				-  // nesting loops such that the innermost loop has the smallest stride for the
			
 
				-  // best cache behavior.
			
 
				-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				-          output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
			
 
				-              input1_data[SubscriptToIndex(desc1, b, y, x, c)],
			
 
				-              input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
			
 
				-  const int32_t input1_val = params.input1_offset + x;
			
 
				-  const int32_t input2_val = params.input2_offset + y;
			
 
				-  const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
			
 
				-  const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
			
 
				-  const int32_t scaled_input1_val =
			
 
				-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          shifted_input1_val, params.input1_multiplier, params.input1_shift);
			
 
				-  const int32_t scaled_input2_val =
			
 
				-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          shifted_input2_val, params.input2_multiplier, params.input2_shift);
			
 
				-  const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
			
 
				-  const int32_t raw_output =
			
 
				-      MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          raw_sum, params.output_multiplier, params.output_shift) +
			
 
				-      params.output_offset;
			
 
				-  const int32_t clamped_output =
			
 
				-      std::min(params.quantized_activation_max,
			
 
				-               std::max(params.quantized_activation_min, raw_output));
			
 
				-  return static_cast<int8_t>(clamped_output);
			
 
				-}
			
 
				-
			
 
				-// Element-wise add that can often be used for inner loop of broadcast add as
			
 
				-// well as the non-broadcast add.
			
 
				-inline void AddElementwise(int size, const ArithmeticParams& params,
			
 
				-                           const int8_t* input1_data, const int8_t* input2_data,
			
 
				-                           int8_t* output_data) {
			
 
				-  ElementWise(size, params, input1_data, input2_data, output_data,
			
 
				-              CheckArithmeticParams, AddFunc);
			
 
				-}
			
 
				-
			
 
				-inline void Add(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const int8_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const int8_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  CheckArithmeticParams(params);
			
 
				-
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  AddElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
			
 
				-                               const RuntimeShape& input1_shape,
			
 
				-                               const int8_t* input1_data,
			
 
				-                               const RuntimeShape& input2_shape,
			
 
				-                               const int8_t* input2_data,
			
 
				-                               const RuntimeShape& output_shape,
			
 
				-                               int8_t* output_data) {
			
 
				-  BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
			
 
				-                                input2_data, output_shape, output_data,
			
 
				-                                CheckArithmeticParams, AddFunc);
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -1,238 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-// Fixed-point per-channel-quantization convolution reference kernel.
			
 
				-inline void ConvPerChannel(
			
 
				-    const ConvParams& params, const int32_t* output_multiplier,
			
 
				-    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int8_t* output_data) {
			
 
				-  // Get parameters.
			
 
				-  const int32_t input_offset = params.input_offset;  // r = s(q - Z)
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-
			
 
				-  // Set min and max value of the output.
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-
			
 
				-  // Consistency check.
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-
			
 
				-  // Check dimensions of the tensors.
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int filter_input_depth = filter_shape.Dims(3);
			
 
				-  const int groups = input_depth / filter_input_depth;
			
 
				-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
			
 
				-  const int filters_per_group = output_depth / groups;
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          auto group = out_channel / filters_per_group;
			
 
				-          int32_t acc = 0;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-
			
 
				-              // Zero padding by omitting the areas outside the image.
			
 
				-              const bool is_point_inside_image =
			
 
				-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                  (in_y < input_height);
			
 
				-
			
 
				-              if (!is_point_inside_image) {
			
 
				-                continue;
			
 
				-              }
			
 
				-
			
 
				-              for (int in_channel = 0; in_channel < filter_input_depth;
			
 
				-                   ++in_channel) {
			
 
				-                int32_t input_val =
			
 
				-                    input_data[Offset(input_shape, batch, in_y, in_x,
			
 
				-                                      in_channel + group * filter_input_depth)];
			
 
				-                int32_t filter_val = filter_data[Offset(
			
 
				-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
			
 
				-                // Accumulate with 32 bits accumulator.
			
 
				-                // In the nudging process during model quantization, we force
			
 
				-                // real value of 0.0 be represented by a quantized value. This
			
 
				-                // guarantees that the input_offset is a int8_t, even though
			
 
				-                // it is represented using int32_t. int32_t += int8_t *
			
 
				-                // (int8_t - int8_t) so the highest value we can get from each
			
 
				-                // accumulation is [-127, 127] * ([-128, 127] -
			
 
				-                // [-128, 127]), which is [-32512, 32512]. log2(32512)
			
 
				-                // = 14.98, which means we can accumulate at least 2^16
			
 
				-                // multiplications without overflow. The accumulator is
			
 
				-                // applied to a filter so the accumulation logic will hold as
			
 
				-                // long as the filter size (filter_y * filter_x * in_channel)
			
 
				-                // does not exceed 2^16, which is the case in all the models
			
 
				-                // we have seen so far.
			
 
				-                // TODO(b/174275578): Add a check to make sure the
			
 
				-                // accumulator depth is smaller than 2^16.
			
 
				-                acc += filter_val * (input_val + input_offset);
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-
			
 
				-          if (bias_data) {
			
 
				-            acc += bias_data[out_channel];
			
 
				-          }
			
 
				-          acc = MultiplyByQuantizedMultiplier(
			
 
				-              acc, output_multiplier[out_channel], output_shift[out_channel]);
			
 
				-          acc += output_offset;
			
 
				-          acc = std::max(acc, output_activation_min);
			
 
				-          acc = std::min(acc, output_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              static_cast<int8_t>(acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Fixed-point per-channel-quantization convolution reference kernel.
			
 
				-// 16-bit data and 8-bit filter
			
 
				-template <typename AccumScalar>
			
 
				-inline void ConvPerChannel(
			
 
				-    const ConvParams& params, const int32_t* output_multiplier,
			
 
				-    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data) {
			
 
				-  // Get parameters.
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-
			
 
				-  // Set min and max value of the output.
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-
			
 
				-  // Consistency check.
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-
			
 
				-  // Check dimensions of the tensors.
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int filter_input_depth = filter_shape.Dims(3);
			
 
				-  const int groups = input_depth / filter_input_depth;
			
 
				-  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
			
 
				-  const int filters_per_group = output_depth / groups;
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          auto group = out_channel / filters_per_group;
			
 
				-          AccumScalar acc = 0;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            const int in_y = in_y_origin + dilation_height_factor * filter_y;
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-
			
 
				-              // Zero padding by omitting the areas outside the image.
			
 
				-              const bool is_point_inside_image =
			
 
				-                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                  (in_y < input_height);
			
 
				-
			
 
				-              if (!is_point_inside_image) {
			
 
				-                continue;
			
 
				-              }
			
 
				-
			
 
				-              for (int in_channel = 0; in_channel < filter_input_depth;
			
 
				-                   ++in_channel) {
			
 
				-                int32_t input_val =
			
 
				-                    input_data[Offset(input_shape, batch, in_y, in_x,
			
 
				-                                      in_channel + group * filter_input_depth)];
			
 
				-                int32_t filter_val = filter_data[Offset(
			
 
				-                    filter_shape, out_channel, filter_y, filter_x, in_channel)];
			
 
				-                // Accumulate with 64 bits accumulator.
			
 
				-                // int64_t += int8_t * int16_t so the highest value we can
			
 
				-                // get from each accumulation is [-127, 127] * ([-32768,
			
 
				-                // 32767] -
			
 
				-                // [-32768, 32767]), which is [-8322945, 8322945].
			
 
				-                // log2(8322945) = 22.99.
			
 
				-                acc += filter_val * input_val;
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-          if (bias_data) {
			
 
				-            acc += bias_data[out_channel];
			
 
				-          }
			
 
				-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
			
 
				-              acc, output_multiplier[out_channel], output_shift[out_channel]);
			
 
				-          scaled_acc = std::max(scaled_acc, output_activation_min);
			
 
				-          scaled_acc = std::min(scaled_acc, output_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              static_cast<int16_t>(scaled_acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
@@ -1,291 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-inline void DepthwiseConvPerChannel(
			
 
				-    const DepthwiseParams& params, const int32_t* output_multiplier,
			
 
				-    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int8_t* output_data) {
			
 
				-  // Get parameters.
			
 
				-  // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const int depth_multiplier = params.depth_multiplier;
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-
			
 
				-  // Check dimensions of the tensors.
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				-          for (int m = 0; m < depth_multiplier; ++m) {
			
 
				-            const int output_channel = m + in_channel * depth_multiplier;
			
 
				-            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-            int32_t acc = 0;
			
 
				-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-                const int in_y =
			
 
				-                    in_y_origin + dilation_height_factor * filter_y;
			
 
				-                // Zero padding by omitting the areas outside the image.
			
 
				-                const bool is_point_inside_image =
			
 
				-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                    (in_y < input_height);
			
 
				-                if (is_point_inside_image) {
			
 
				-                  int32_t input_val = input_data[Offset(
			
 
				-                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				-                  int32_t filter_val = filter_data[Offset(
			
 
				-                      filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				-                  // Accumulate with 32 bits accumulator.
			
 
				-                  // In the nudging process during model quantization, we force
			
 
				-                  // real value of 0.0 be represented by a quantized value. This
			
 
				-                  // guarantees that the input_offset is a int8_t, even though
			
 
				-                  // it is represented using int32_t. int32_t += int8_t *
			
 
				-                  // (int8_t - int8_t) so the highest value we can get from each
			
 
				-                  // accumulation is [-127, 127] * ([-128, 127] -
			
 
				-                  // [-128, 127]), which is [-32512, 32512]. log2(32512)
			
 
				-                  // = 14.98, which means we can accumulate at least 2^16
			
 
				-                  // multiplications without overflow. The accumulator is
			
 
				-                  // applied to a filter so the accumulation logic will hold as
			
 
				-                  // long as the filter size (filter_y * filter_x * in_channel)
			
 
				-                  // does not exceed 2^16, which is the case in all the models
			
 
				-                  // we have seen so far.
			
 
				-                  // TODO(b/174275578): Add a check to make sure the
			
 
				-                  // accumulator depth is smaller than 2^16.
			
 
				-                  acc += filter_val * (input_val + input_offset);
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-            if (bias_data) {
			
 
				-              acc += bias_data[output_channel];
			
 
				-            }
			
 
				-            acc = MultiplyByQuantizedMultiplier(
			
 
				-                acc, output_multiplier[output_channel],
			
 
				-                output_shift[output_channel]);
			
 
				-            acc += output_offset;
			
 
				-            acc = std::max(acc, output_activation_min);
			
 
				-            acc = std::min(acc, output_activation_max);
			
 
				-            output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				-                               output_channel)] = static_cast<int8_t>(acc);
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void DepthwiseConvPerChannel(
			
 
				-    const DepthwiseParams& params, const int32_t* output_multiplier,
			
 
				-    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data) {
			
 
				-  // Get parameters.
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const int depth_multiplier = params.depth_multiplier;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-
			
 
				-  // Check dimensions of the tensors.
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				-  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				-          for (int m = 0; m < depth_multiplier; ++m) {
			
 
				-            const int output_channel = m + in_channel * depth_multiplier;
			
 
				-            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-            std::int64_t acc = 0;
			
 
				-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-                const int in_y =
			
 
				-                    in_y_origin + dilation_height_factor * filter_y;
			
 
				-                // Zero padding by omitting the areas outside the image.
			
 
				-                const bool is_point_inside_image =
			
 
				-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                    (in_y < input_height);
			
 
				-                if (is_point_inside_image) {
			
 
				-                  int32_t input_val = input_data[Offset(
			
 
				-                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				-                  int32_t filter_val = filter_data[Offset(
			
 
				-                      filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				-                  // Accumulate with 64 bits accumulator.
			
 
				-                  // We assume maximum of 2^16 accumulations as with the 8-bit
			
 
				-                  // case so actually the value in the accumulator should not
			
 
				-                  // exceed 40 bits
			
 
				-                  acc += static_cast<int64_t>(filter_val) *
			
 
				-                         static_cast<int64_t>(input_val);
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-            if (bias_data) {
			
 
				-              acc += bias_data[output_channel];
			
 
				-            }
			
 
				-            int32_t scaled_acc = MultiplyByQuantizedMultiplier(
			
 
				-                acc, output_multiplier[output_channel],
			
 
				-                output_shift[output_channel]);
			
 
				-            scaled_acc = std::max(scaled_acc, output_activation_min);
			
 
				-            scaled_acc = std::min(scaled_acc, output_activation_max);
			
 
				-            output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				-                               output_channel)] =
			
 
				-                static_cast<int16_t>(scaled_acc);
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void DepthwiseConvHybridPerChannel(
			
 
				-    const DepthwiseParams& params, float* scaling_factors_ptr,
			
 
				-    const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				-    const RuntimeShape& filter_shape, const int8_t* filter_data,
			
 
				-    const RuntimeShape& bias_shape, const float* bias_data,
			
 
				-    const RuntimeShape& output_shape, float* output_data,
			
 
				-    const float* per_channel_scale, int32_t* input_offset) {
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int dilation_width_factor = params.dilation_width_factor;
			
 
				-  const int dilation_height_factor = params.dilation_height_factor;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  const int depth_multiplier = params.depth_multiplier;
			
 
				-  const float output_activation_min = params.float_activation_min;
			
 
				-  const float output_activation_max = params.float_activation_max;
			
 
				-  // Check dimensions of the tensors.
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int bias_depth = bias_shape.FlatSize();
			
 
				-  TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
			
 
				-  TFLITE_DCHECK_EQ(bias_depth, output_depth);
			
 
				-
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				-          for (int m = 0; m < depth_multiplier; ++m) {
			
 
				-            const int output_channel = m + in_channel * depth_multiplier;
			
 
				-            const int in_x_origin = (out_x * stride_width) - pad_width;
			
 
				-            const int in_y_origin = (out_y * stride_height) - pad_height;
			
 
				-            int32_t acc = 0;
			
 
				-            for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-              for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-                const int in_x = in_x_origin + dilation_width_factor * filter_x;
			
 
				-                const int in_y =
			
 
				-                    in_y_origin + dilation_height_factor * filter_y;
			
 
				-                // Zero padding by omitting the areas outside the image.
			
 
				-                const bool is_point_inside_image =
			
 
				-                    (in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
			
 
				-                    (in_y < input_height);
			
 
				-                if (is_point_inside_image) {
			
 
				-                  int32_t input_val = input_data[Offset(
			
 
				-                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				-                  int32_t filter_val = filter_data[Offset(
			
 
				-                      filter_shape, 0, filter_y, filter_x, output_channel)];
			
 
				-                  acc += filter_val * (input_val - input_offset[batch]);
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-            float acc_float = static_cast<float>(acc);
			
 
				-            acc_float *=
			
 
				-                per_channel_scale[output_channel] * scaling_factors_ptr[batch];
			
 
				-            if (bias_data && output_channel < bias_depth) {
			
 
				-              acc_float += bias_data[output_channel];
			
 
				-            }
			
 
				-            output_data[Offset(output_shape, batch, out_y, out_x,
			
 
				-                               output_channel)] =
			
 
				-                ActivationFunctionWithMinMax(acc_float, output_activation_min,
			
 
				-                                             output_activation_max);
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -1,201 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-// For per-channel functions, since it is defined in quantization spec that
			
 
				-// weights are symmetric
			
 
				-// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
			
 
				-// zero_point (params.weights_offset) is always 0.
			
 
				-// However, for per-tensor functions, params.weights_offset is still applied for
			
 
				-// backward compatibility.
			
 
				-
			
 
				-inline void FullyConnectedPerChannel(
			
 
				-    const FullyConnectedParams& params, const int32_t* output_multiplier,
			
 
				-    const int* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int8_t* output_data) {
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				-  const int batches = output_shape.Dims(0);
			
 
				-  const int output_depth = output_shape.Dims(1);
			
 
				-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
			
 
				-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      int32_t acc = 0;
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        int32_t input_val = input_data[b * accum_depth + d];
			
 
				-        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				-        acc += filter_val * (input_val + input_offset);
			
 
				-      }
			
 
				-      if (bias_data) {
			
 
				-        acc += bias_data[out_c];
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
			
 
				-                                          output_shift[out_c]);
			
 
				-      acc += output_offset;
			
 
				-      acc = std::max(acc, output_activation_min);
			
 
				-      acc = std::min(acc, output_activation_max);
			
 
				-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename AccumScalar>
			
 
				-inline void FullyConnectedPerChannel(
			
 
				-    const FullyConnectedParams& params, const int32_t* output_multiplier,
			
 
				-    const int* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data) {
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				-  const int output_dim_count = output_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				-  const int output_depth = output_shape.Dims(output_dim_count - 1);
			
 
				-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
			
 
				-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      AccumScalar acc = 0;
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        int32_t input_val = input_data[b * accum_depth + d];
			
 
				-        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				-        acc += filter_val * input_val;
			
 
				-      }
			
 
				-      if (bias_data) {
			
 
				-        acc += bias_data[out_c];
			
 
				-      }
			
 
				-      int32_t acc_scaled = MultiplyByQuantizedMultiplier(
			
 
				-          acc, output_multiplier[out_c], output_shift[out_c]);
			
 
				-      acc_scaled = std::max(acc_scaled, output_activation_min);
			
 
				-      acc_scaled = std::min(acc_scaled, output_activation_max);
			
 
				-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void FullyConnected(
			
 
				-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				-    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int8_t* output_data) {
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t filter_offset = params.weights_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				-  const int output_dim_count = output_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				-  const int output_depth = output_shape.Dims(output_dim_count - 1);
			
 
				-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
			
 
				-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      int32_t acc = 0;
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        int32_t input_val = input_data[b * accum_depth + d];
			
 
				-        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				-        acc += (filter_val + filter_offset) * (input_val + input_offset);
			
 
				-      }
			
 
				-      if (bias_data) {
			
 
				-        acc += bias_data[out_c];
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				-      acc += output_offset;
			
 
				-      acc = std::max(acc, output_activation_min);
			
 
				-      acc = std::min(acc, output_activation_max);
			
 
				-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename AccumScalar>
			
 
				-inline void FullyConnected(
			
 
				-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
			
 
				-    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const AccumScalar* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data) {
			
 
				-  const int32_t filter_offset = params.weights_offset;
			
 
				-  const int32_t output_multiplier = params.output_multiplier;
			
 
				-  const int output_shift = params.output_shift;
			
 
				-  const int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  const int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
			
 
				-  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
			
 
				-
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-  const int filter_dim_count = filter_shape.DimensionsCount();
			
 
				-  const int output_dim_count = output_shape.DimensionsCount();
			
 
				-  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
			
 
				-  const int output_depth = output_shape.Dims(output_dim_count - 1);
			
 
				-  TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
			
 
				-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < output_depth; ++out_c) {
			
 
				-      AccumScalar acc = 0;
			
 
				-      for (int d = 0; d < accum_depth; ++d) {
			
 
				-        int32_t input_val = input_data[b * accum_depth + d];
			
 
				-        int32_t filter_val = filter_data[out_c * accum_depth + d];
			
 
				-        acc += (filter_val + filter_offset) * input_val;
			
 
				-      }
			
 
				-      if (bias_data) {
			
 
				-        acc += bias_data[out_c];
			
 
				-      }
			
 
				-      int32_t acc_scaled =
			
 
				-          MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
			
 
				-      acc_scaled = std::max(acc_scaled, output_activation_min);
			
 
				-      acc_scaled = std::min(acc_scaled, output_activation_max);
			
 
				-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
@@ -1,67 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
			
 
				-                            int32_t depth, const int8_t* input_data,
			
 
				-                            int8_t* output_data) {
			
 
				-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
			
 
				-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
			
 
				-  // The output scale must be in sync with Prepare().
			
 
				-  // Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
			
 
				-  // to [-1, 127/128].
			
 
				-  static constexpr int32_t kOutputScale = 7;
			
 
				-  for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
			
 
				-    // int32_t = (int8_t - int8_t) ^ 2.
			
 
				-    // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
			
 
				-    // safe from overflowing in at least 2^16 steps.
			
 
				-    int32_t acc = 0;
			
 
				-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				-      int32_t input =
			
 
				-          input_data[depth * outer_index + inner_index] - input_zero_point;
			
 
				-      acc += input * input;
			
 
				-    }
			
 
				-    int32_t inv_l2norm_multiplier;
			
 
				-    int inv_l2norm_shift;
			
 
				-    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
			
 
				-                                     &inv_l2norm_shift);
			
 
				-
			
 
				-    for (int inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				-      int32_t input =
			
 
				-          input_data[depth * outer_index + inner_index] - input_zero_point;
			
 
				-
			
 
				-      // Rescale and downcast. Rescale is folded into the division.
			
 
				-      int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
			
 
				-          input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
			
 
				-      output_in_q24 =
			
 
				-          std::min(static_cast<int32_t>(kMaxInt8),
			
 
				-                   std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
			
 
				-      output_data[depth * outer_index + inner_index] =
			
 
				-          static_cast<int8_t>(output_in_q24);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@@ -1,121 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
			
 
				-                     int32_t input_multiplier, int32_t input_left_shift,
			
 
				-                     int32_t input_size, const int8_t* input_data,
			
 
				-                     int8_t* output_data) {
			
 
				-  // Integer bits must be in sync with Prepare() function.
			
 
				-  static constexpr int32_t kInputIntegerBits = 4;
			
 
				-  static constexpr int32_t kOutputIntegerBits = 8;
			
 
				-  static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
			
 
				-  static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
			
 
				-  static constexpr int32_t kOutputZeroPoint = -128;
			
 
				-
			
 
				-  for (int i = 0; i < input_size; ++i) {
			
 
				-    const int32_t input =
			
 
				-        static_cast<int32_t>(input_data[i]) - input_zero_point;
			
 
				-    if (input <= -input_range_radius) {
			
 
				-      output_data[i] = kMinInt8;
			
 
				-    } else if (input >= input_range_radius) {
			
 
				-      output_data[i] = kMaxInt8;
			
 
				-    } else {
			
 
				-      const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
			
 
				-          input, input_multiplier, input_left_shift);
			
 
				-      using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
			
 
				-      const int32_t output_in_q0 =
			
 
				-          gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
			
 
				-
			
 
				-      // Rescale and downcast.
			
 
				-      using gemmlowp::RoundingDivideByPOT;
			
 
				-      int32_t output_in_q23 =
			
 
				-          RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
			
 
				-      output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
			
 
				-                                        static_cast<int32_t>(kMinInt8)),
			
 
				-                               static_cast<int32_t>(kMaxInt8));
			
 
				-      output_data[i] = static_cast<int8_t>(output_in_q23);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
			
 
				-                     int32_t input_size, const int16_t* ptr_input_data,
			
 
				-                     int16_t* ptr_output_data) {
			
 
				-  // We use the LUT for sigmoid and take into account, that
			
 
				-  // tanh(x) = 2*sigmoid(2*x) - 1
			
 
				-
			
 
				-  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
			
 
				-  // In case of general parameter scale, multiplier 3 is taken into account
			
 
				-  // in TanhPrepare function and it is included in
			
 
				-  // input_multiplier already.
			
 
				-
			
 
				-  TFLITE_DCHECK_GE(input_left_shift, 0);
			
 
				-  if (input_multiplier == 0) {  // power of two case
			
 
				-    input_multiplier = 3 << input_left_shift;
			
 
				-    input_left_shift = 0;
			
 
				-  }
			
 
				-
			
 
				-  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
			
 
				-
			
 
				-  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
			
 
				-    int32_t input_data =
			
 
				-        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
			
 
				-
			
 
				-    // We do interpolation on unsigned values.
			
 
				-    uint32_t abs_input_data = abs(input_data);
			
 
				-
			
 
				-    // We divide by 2 power of 9, because
			
 
				-    // we need to divide by 2 in power of 7 for
			
 
				-    // the input conversion + 1/4 from the scale above.
			
 
				-
			
 
				-    // Define uh as uint32_t type not to make this function overflow.
			
 
				-    uint32_t uh = abs_input_data >> 9;
			
 
				-    uint32_t result;
			
 
				-
			
 
				-    if (uh >= 255) {
			
 
				-      // Saturate to maximum.
			
 
				-      result = 0x7FFF << 10;
			
 
				-    } else {
			
 
				-      uint32_t ua = sigmoid_table_uint16[uh];
			
 
				-      uint32_t ub = sigmoid_table_uint16[uh + 1];
			
 
				-      uint32_t ut = abs_input_data & 0x1ff;
			
 
				-      // Interpolation is done using the fractional bit.
			
 
				-      result = (ua << 9) + ut * (ub - ua);
			
 
				-    }
			
 
				-
			
 
				-    result = (input_data >= 0) ? (result + (1 << 9))
			
 
				-                               : ((1 << (16 + 9)) - result + (1 << 9) - 1);
			
 
				-
			
 
				-    // Back to 16-bit.
			
 
				-    result >>= 10;
			
 
				-
			
 
				-    *ptr_output_data = result;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
@@ -1,79 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-template <typename integer_type>
			
 
				-inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
			
 
				-                 int32_t shift, const RuntimeShape& unextended_input_shape,
			
 
				-                 const integer_type* input_data, int32_t input_zero_point,
			
 
				-                 const RuntimeShape& unextended_output_shape,
			
 
				-                 integer_type* output_data, int32_t output_zero_point) {
			
 
				-  // Current implementation only supports dimension equals 4 and simultaneous
			
 
				-  // reduction over width and height.
			
 
				-  TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
			
 
				-  const RuntimeShape input_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				-  const RuntimeShape output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
			
 
				-  const int output_batch = output_shape.Dims(0);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int output_depth = output_shape.Dims(3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int num_elements_in_axis = input_width * input_height;
			
 
				-
			
 
				-  TFLITE_CHECK_EQ(op_params.axis_count, 2);
			
 
				-  TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
			
 
				-               (op_params.axis[0] == 2 && op_params.axis[1] == 1));
			
 
				-  TFLITE_CHECK_EQ(output_height, 1);
			
 
				-  TFLITE_CHECK_EQ(output_width, 1);
			
 
				-
			
 
				-  static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
			
 
				-  static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
			
 
				-
			
 
				-  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				-    for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				-      int32_t acc = 0;
			
 
				-      for (int in_h = 0; in_h < input_height; ++in_h) {
			
 
				-        for (int in_w = 0; in_w < input_width; ++in_w) {
			
 
				-          acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
			
 
				-                 input_zero_point;
			
 
				-        }
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
			
 
				-      acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
			
 
				-                    : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
			
 
				-      acc += output_zero_point;
			
 
				-      acc = std::min(std::max(acc, kMinInt), kMaxInt);
			
 
				-      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
			
 
				-          static_cast<integer_type>(acc);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -1,133 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "ruy/profiler/instrumentation.h"  // from @ruy
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void MulElementwise(int size, const ArithmeticParams& params,
			
 
				-                           const T* input1_data, const T* input2_data,
			
 
				-                           T* output_data) {
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    const int32_t input1_val = params.input1_offset + input1_data[i];
			
 
				-    const int32_t input2_val = params.input2_offset + input2_data[i];
			
 
				-    const int32_t unclamped_result =
			
 
				-        params.output_offset +
			
 
				-        MultiplyByQuantizedMultiplier(input1_val * input2_val,
			
 
				-                                      params.output_multiplier,
			
 
				-                                      params.output_shift);
			
 
				-    const int32_t clamped_output =
			
 
				-        std::min(params.quantized_activation_max,
			
 
				-                 std::max(params.quantized_activation_min, unclamped_result));
			
 
				-    output_data[i] = static_cast<T>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void Mul(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const T* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const T* input2_data,
			
 
				-                const RuntimeShape& output_shape, T* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  ruy::profiler::ScopeLabel label("Mul/8bit");
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  MulElementwise(flat_size, params, input1_data, input2_data, output_data);
			
 
				-}
			
 
				-
			
 
				-// Mul with 16 bit inputs and int8_t outputs.
			
 
				-inline void Mul(const ArithmeticParams& params,
			
 
				-                const RuntimeShape& input1_shape, const int16_t* input1_data,
			
 
				-                const RuntimeShape& input2_shape, const int16_t* input2_data,
			
 
				-                const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  ruy::profiler::ScopeLabel label("Mul/Int16Int8");
			
 
				-  int32_t output_offset = params.output_offset;
			
 
				-  int32_t output_activation_min = params.quantized_activation_min;
			
 
				-  int32_t output_activation_max = params.quantized_activation_max;
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-
			
 
				-  const int flat_size =
			
 
				-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    // F0 uses 0 integer bits, range [-1, 1].
			
 
				-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				-
			
 
				-    F0 unclamped_result =
			
 
				-        F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
			
 
				-    int16_t rescaled_result =
			
 
				-        gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
			
 
				-    int16_t clamped_result = std::min<int16_t>(
			
 
				-        output_activation_max - output_offset, rescaled_result);
			
 
				-    clamped_result = std::max<int16_t>(output_activation_min - output_offset,
			
 
				-                                       clamped_result);
			
 
				-    output_data[i] = output_offset + clamped_result;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void BroadcastMul4DSlow(
			
 
				-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
			
 
				-    const T* input1_data, const RuntimeShape& input2_shape,
			
 
				-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
			
 
				-  ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
			
 
				-
			
 
				-  NdArrayDesc<4> desc1;
			
 
				-  NdArrayDesc<4> desc2;
			
 
				-  // The input shapes are extended as part of NdArrayDesc initialization.
			
 
				-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
			
 
				-                                      &desc2);
			
 
				-  const RuntimeShape extended_output_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, output_shape);
			
 
				-
			
 
				-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
			
 
				-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
			
 
				-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
			
 
				-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
			
 
				-          const int32_t input1_val =
			
 
				-              params.input1_offset +
			
 
				-              input1_data[SubscriptToIndex(desc1, b, y, x, c)];
			
 
				-          const int32_t input2_val =
			
 
				-              params.input2_offset +
			
 
				-              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
			
 
				-          const int32_t unclamped_result =
			
 
				-              params.output_offset +
			
 
				-              MultiplyByQuantizedMultiplier(input1_val * input2_val,
			
 
				-                                            params.output_multiplier,
			
 
				-                                            params.output_shift);
			
 
				-          const int32_t clamped_output = std::min(
			
 
				-              params.quantized_activation_max,
			
 
				-              std::max(params.quantized_activation_min, unclamped_result));
			
 
				-          output_data[Offset(extended_output_shape, b, y, x, c)] =
			
 
				-              static_cast<T>(clamped_output);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -1,264 +0,0 @@
 
				-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-inline bool AveragePool(const PoolParams& params,
			
 
				-                        const RuntimeShape& input_shape,
			
 
				-                        const int8_t* input_data,
			
 
				-                        const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          int32_t acc = 0;
			
 
				-          int filter_count = 0;
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              acc +=
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				-              filter_count++;
			
 
				-            }
			
 
				-          }
			
 
				-          if (filter_count == 0) return false;
			
 
				-          // Round to the closest integer value.
			
 
				-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
			
 
				-                        : (acc - filter_count / 2) / filter_count;
			
 
				-          acc = std::max(acc, params.quantized_activation_min);
			
 
				-          acc = std::min(acc, params.quantized_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              static_cast<int8_t>(acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return true;
			
 
				-}
			
 
				-
			
 
				-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				-                    const int8_t* input_data, const RuntimeShape& output_shape,
			
 
				-                    int8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  TFLITE_DCHECK_GE(params.quantized_activation_min,
			
 
				-                   std::numeric_limits<int8_t>::min());
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_max,
			
 
				-                   std::numeric_limits<int8_t>::max());
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          int8_t max = std::numeric_limits<int8_t>::lowest();
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              max = std::max(
			
 
				-                  max,
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				-            }
			
 
				-          }
			
 
				-          max = std::max<int8_t>(max, params.quantized_activation_min);
			
 
				-          max = std::min<int8_t>(max, params.quantized_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              static_cast<int8_t>(max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline bool AveragePool(const PoolParams& params,
			
 
				-                        const RuntimeShape& input_shape,
			
 
				-                        const int16_t* input_data,
			
 
				-                        const RuntimeShape& output_shape,
			
 
				-                        int16_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          int32_t acc = 0;
			
 
				-          int filter_count = 0;
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              acc +=
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				-              filter_count++;
			
 
				-            }
			
 
				-          }
			
 
				-          if (filter_count == 0) return false;
			
 
				-          // Round to the closest integer value.
			
 
				-          acc = acc > 0 ? (acc + filter_count / 2) / filter_count
			
 
				-                        : (acc - filter_count / 2) / filter_count;
			
 
				-          acc = std::max(acc, params.quantized_activation_min);
			
 
				-          acc = std::min(acc, params.quantized_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              static_cast<int16_t>(acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return true;
			
 
				-}
			
 
				-
			
 
				-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				-                    const int16_t* input_data, const RuntimeShape& output_shape,
			
 
				-                    int16_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  TFLITE_DCHECK_GE(params.quantized_activation_min,
			
 
				-                   std::numeric_limits<int16_t>::min());
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_max,
			
 
				-                   std::numeric_limits<int16_t>::max());
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          int16_t max = std::numeric_limits<int16_t>::lowest();
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              max = std::max(
			
 
				-                  max,
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				-            }
			
 
				-          }
			
 
				-          max = std::max<int16_t>(max, params.quantized_activation_min);
			
 
				-          max = std::min<int16_t>(max, params.quantized_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              static_cast<int16_t>(max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@@ -1,117 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
			
 
				-                 int32_t input_multiplier, int32_t input_shift,
			
 
				-                 const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				-                 const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  // Integer bits must be in sync with Prepare() function.
			
 
				-  static constexpr int32_t kInputIntegerBits = 4;
			
 
				-  static constexpr int32_t kOutputScale = 7;
			
 
				-  static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
			
 
				-  static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
			
 
				-  using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
			
 
				-
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const int32_t input =
			
 
				-        static_cast<int32_t>(input_data[i]) - input_zero_point;
			
 
				-    if (input <= -input_range_radius) {
			
 
				-      output_data[i] = kMinInt8;
			
 
				-    } else if (input >= input_range_radius) {
			
 
				-      output_data[i] = kMaxInt8;
			
 
				-    } else {
			
 
				-      const int32_t input_in_q4 =
			
 
				-          MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
			
 
				-      const int32_t output_in_q0 =
			
 
				-          gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
			
 
				-
			
 
				-      // Rescale and downcast.
			
 
				-      using gemmlowp::RoundingDivideByPOT;
			
 
				-      int32_t output_in_q24 =
			
 
				-          RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
			
 
				-      output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
			
 
				-      output_data[i] = static_cast<int8_t>(output_in_q24);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
			
 
				-                 const RuntimeShape& input_shape, const int16_t* ptr_input_data,
			
 
				-                 const RuntimeShape& output_shape, int16_t* ptr_output_data) {
			
 
				-  // We use the LUT for sigmoid and take into account, that
			
 
				-  // tanh(x) = 2*sigmoid(2*x) - 1
			
 
				-
			
 
				-  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
			
 
				-  // In case of general parameter scale, multiplier 3 is taken into account
			
 
				-  // in TanhPrepare function and it is included in
			
 
				-  // input_multiplier already.
			
 
				-
			
 
				-  if (input_multiplier == 0) {  // power of two case
			
 
				-    input_multiplier = 3 << input_left_shift;
			
 
				-    input_left_shift = 0;
			
 
				-  }
			
 
				-
			
 
				-  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
			
 
				-
			
 
				-  int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
			
 
				-    int32_t input_data =
			
 
				-        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
			
 
				-
			
 
				-    uint32_t abs_input_data = abs(input_data);
			
 
				-    uint32_t uh = abs_input_data >> 8;
			
 
				-    int32_t result;
			
 
				-
			
 
				-    if (uh >= 255) {
			
 
				-      // Saturate to maximum.
			
 
				-      result = 0xFFFF << 8;
			
 
				-    } else {
			
 
				-      uint32_t ua = sigmoid_table_uint16[uh];
			
 
				-      uint32_t ub = sigmoid_table_uint16[uh + 1];
			
 
				-
			
 
				-      uint8_t ut = abs_input_data & 0xFF;
			
 
				-
			
 
				-      result = (ua << 8) + ut * (ub - ua);
			
 
				-    }
			
 
				-
			
 
				-    result = (input_data >= 0)
			
 
				-                 ? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
			
 
				-                 : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
			
 
				-
			
 
				-    // Convert back to 16-bit.
			
 
				-    result >>= (9 - 1);
			
 
				-
			
 
				-    *ptr_output_data = result;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
@@ -1,224 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_integer_ops {
			
 
				-
			
 
				-// Fixed-point per-channel-quantization transpose convolution reference kernel.
			
 
				-inline void TransposeConv(
			
 
				-    const ConvParams& params, const int32_t* output_multiplier,
			
 
				-    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int8_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const int32_t* bias_data, const RuntimeShape& output_shape,
			
 
				-    int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
			
 
				-    int32_t* scratch_buffer) {
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  (void)im2col_data;   // only used in optimized code.
			
 
				-  (void)im2col_shape;  // only used in optimized code.
			
 
				-
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int32_t input_offset = params.input_offset;
			
 
				-  const int32_t output_offset = params.output_offset;
			
 
				-  const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
			
 
				-  const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-
			
 
				-  const int num_elements = output_shape.FlatSize();
			
 
				-  // We need to initialize scratch_buffer to all 0s, as we apply the same
			
 
				-  // 'scatter' based trick as in float version.
			
 
				-  memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
			
 
				-
			
 
				-  // Loop through input elements one at a time.
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int in_y = 0; in_y < input_height; ++in_y) {
			
 
				-      for (int in_x = 0; in_x < input_width; ++in_x) {
			
 
				-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				-          // Loop through the output elements it will influence.
			
 
				-          const int out_x_origin = (in_x * stride_width) - pad_width;
			
 
				-          const int out_y_origin = (in_y * stride_height) - pad_height;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              for (int out_channel = 0; out_channel < output_depth;
			
 
				-                   ++out_channel) {
			
 
				-                // Compute output element location.
			
 
				-                const int out_x = out_x_origin + filter_x;
			
 
				-                const int out_y = out_y_origin + filter_y;
			
 
				-                // We cannot accumulate out of bounds.
			
 
				-                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
			
 
				-                    (out_y < output_height)) {
			
 
				-                  const int8_t input_value = input_data[Offset(
			
 
				-                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				-                  const int8_t filter_value =
			
 
				-                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				-                                         filter_x, in_channel)];
			
 
				-                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
			
 
				-                                        out_channel)] +=
			
 
				-                      (input_value + input_offset) * filter_value;
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
			
 
				-                                              out_channel)];
			
 
				-          if (bias_data) {
			
 
				-            acc += bias_data[out_channel];
			
 
				-          }
			
 
				-          acc = MultiplyByQuantizedMultiplier(
			
 
				-              acc, output_multiplier[out_channel], output_shift[out_channel]);
			
 
				-          acc += output_offset;
			
 
				-          acc = std::max(acc, output_activation_min);
			
 
				-          acc = std::min(acc, output_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              static_cast<int8_t>(acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
			
 
				-template <typename Scalar>
			
 
				-inline void TransposeConv(
			
 
				-    const ConvParams& params, const int32_t* output_multiplier,
			
 
				-    const int32_t* output_shift, const RuntimeShape& input_shape,
			
 
				-    const int16_t* input_data, const RuntimeShape& filter_shape,
			
 
				-    const int8_t* filter_data, const RuntimeShape& bias_shape,
			
 
				-    const Scalar* bias_data, const RuntimeShape& output_shape,
			
 
				-    int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
			
 
				-    Scalar* scratch_buffer) {
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int pad_width = params.padding_values.width;
			
 
				-  const int pad_height = params.padding_values.height;
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  (void)im2col_data;   // only used in optimized code.
			
 
				-  (void)im2col_shape;  // only used in optimized code.
			
 
				-
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
			
 
				-  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
			
 
				-  if (bias_data) {
			
 
				-    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
			
 
				-  }
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int filter_height = filter_shape.Dims(1);
			
 
				-  const int filter_width = filter_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
			
 
				-  const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
			
 
				-  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
			
 
				-
			
 
				-  const int num_elements = output_shape.FlatSize();
			
 
				-  // We need to initialize scratch_buffer to all 0s, as we apply the same
			
 
				-  // 'scatter' based trick as in float version.
			
 
				-  memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
			
 
				-
			
 
				-  // Loop through input elements one at a time.
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int in_y = 0; in_y < input_height; ++in_y) {
			
 
				-      for (int in_x = 0; in_x < input_width; ++in_x) {
			
 
				-        for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
			
 
				-          // Loop through the output elements it will influence.
			
 
				-          const int out_x_origin = (in_x * stride_width) - pad_width;
			
 
				-          const int out_y_origin = (in_y * stride_height) - pad_height;
			
 
				-          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
			
 
				-            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
			
 
				-              for (int out_channel = 0; out_channel < output_depth;
			
 
				-                   ++out_channel) {
			
 
				-                // Compute output element location.
			
 
				-                const int out_x = out_x_origin + filter_x;
			
 
				-                const int out_y = out_y_origin + filter_y;
			
 
				-                // We cannot accumulate out of bounds.
			
 
				-                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
			
 
				-                    (out_y < output_height)) {
			
 
				-                  const int32_t input_value = input_data[Offset(
			
 
				-                      input_shape, batch, in_y, in_x, in_channel)];
			
 
				-                  const int32_t filter_value =
			
 
				-                      filter_data[Offset(filter_shape, out_channel, filter_y,
			
 
				-                                         filter_x, in_channel)];
			
 
				-                  scratch_buffer[Offset(output_shape, batch, out_y, out_x,
			
 
				-                                        out_channel)] +=
			
 
				-                      input_value * filter_value;
			
 
				-                }
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
			
 
				-          Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
			
 
				-                                             out_channel)];
			
 
				-          if (bias_data) {
			
 
				-            acc += bias_data[out_channel];
			
 
				-          }
			
 
				-          int32_t scaled_acc = MultiplyByQuantizedMultiplier(
			
 
				-              acc, output_multiplier[out_channel], output_shift[out_channel]);
			
 
				-          scaled_acc = std::max(scaled_acc, output_activation_min);
			
 
				-          scaled_acc = std::min(scaled_acc, output_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
			
 
				-              static_cast<int16_t>(scaled_acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_integer_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h
@@ -1,90 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "tensorflow/lite/c/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
			
 
				-                            const RuntimeShape& input_shape,
			
 
				-                            const float* input_data,
			
 
				-                            const RuntimeShape& output_shape,
			
 
				-                            float* output_data, float epsilon = 1e-6) {
			
 
				-  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				-  const int outer_size =
			
 
				-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				-  const int depth =
			
 
				-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				-  for (int i = 0; i < outer_size; ++i) {
			
 
				-    float squared_l2_norm = 0;
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      const float val = input_data[depth * i + c];
			
 
				-      squared_l2_norm += val * val;
			
 
				-    }
			
 
				-    float l2_norm = std::sqrt(squared_l2_norm);
			
 
				-    l2_norm = std::max(l2_norm, epsilon);
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
			
 
				-                            const RuntimeShape& input_shape,
			
 
				-                            const uint8_t* input_data,
			
 
				-                            const RuntimeShape& output_shape,
			
 
				-                            uint8_t* output_data) {
			
 
				-  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				-  const int depth =
			
 
				-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				-  const int outer_size =
			
 
				-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				-  const int32_t input_zero_point = op_params.input_zero_point;
			
 
				-
			
 
				-  for (int i = 0; i < outer_size; ++i) {
			
 
				-    int32_t square_l2_norm = 0;
			
 
				-    for (int c = 0; c < depth; c++) {
			
 
				-      int32_t diff = input_data[depth * i + c] - input_zero_point;
			
 
				-      square_l2_norm += diff * diff;
			
 
				-    }
			
 
				-    int32_t inv_l2norm_multiplier;
			
 
				-    int inv_l2norm_shift;
			
 
				-    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
			
 
				-                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
			
 
				-    for (int c = 0; c < depth; c++) {
			
 
				-      int32_t diff = input_data[depth * i + c] - input_zero_point;
			
 
				-      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
			
 
				-      int32_t unclamped_output_val = 128 + rescaled_diff;
			
 
				-      int32_t output_val =
			
 
				-          std::min(static_cast<int32_t>(255),
			
 
				-                   std::max(static_cast<int32_t>(0), unclamped_output_val));
			
 
				-      output_data[depth * i + c] = static_cast<uint8_t>(output_val);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h
@@ -1,69 +0,0 @@
 
				-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void LeakyRelu(const tflite::LeakyReluParams& params,
			
 
				-                      const RuntimeShape& input_shape, const float* input_data,
			
 
				-                      const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const float val = input_data[i];
			
 
				-    // Note that alpha might be > 1 or < 0, so we don't use std::max here.
			
 
				-    output_data[i] = val > 0 ? val : val * params.alpha;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void QuantizeLeakyRelu(const LeakyReluParams& params,
			
 
				-                              const RuntimeShape& input_shape,
			
 
				-                              const T* input_data,
			
 
				-                              const RuntimeShape& output_shape,
			
 
				-                              T* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-  static const int32_t quantized_min = std::numeric_limits<T>::min();
			
 
				-  static const int32_t quantized_max = std::numeric_limits<T>::max();
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    const int32_t input_value = input_data[i] - params.input_offset;
			
 
				-    int32_t unclamped_output;
			
 
				-    if (input_value >= 0) {
			
 
				-      unclamped_output = params.output_offset +
			
 
				-                         MultiplyByQuantizedMultiplier(
			
 
				-                             input_value, params.output_multiplier_identity,
			
 
				-                             params.output_shift_identity);
			
 
				-    } else {
			
 
				-      unclamped_output = params.output_offset +
			
 
				-                         MultiplyByQuantizedMultiplier(
			
 
				-                             input_value, params.output_multiplier_alpha,
			
 
				-                             params.output_shift_alpha);
			
 
				-    }
			
 
				-    const T clamped_output =
			
 
				-        std::min(quantized_max, std::max(quantized_min, unclamped_output));
			
 
				-    output_data[i] = static_cast<T>(clamped_output);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h
@@ -1,256 +0,0 @@
 
				-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cstddef>
			
 
				-#include <limits>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void LogSoftmax(const SoftmaxParams& params,
			
 
				-                       const RuntimeShape& input_shape, const float* input_data,
			
 
				-                       const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				-  const int outer_size =
			
 
				-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				-  const int depth =
			
 
				-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				-
			
 
				-  for (int i = 0; i < outer_size; ++i) {
			
 
				-    // Find max element value which we'll use to ensure numerical stability
			
 
				-    // taking advantage of the following equality:
			
 
				-    // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
			
 
				-    float max = std::numeric_limits<float>::lowest();
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      max = std::max(max, input_data[i * depth + c]);
			
 
				-    }
			
 
				-
			
 
				-    // Compute sum.
			
 
				-    float sum = 0.f;
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      sum += std::exp(input_data[i * depth + c] - max);
			
 
				-    }
			
 
				-
			
 
				-    // Compute result.
			
 
				-    const float log_sum = std::log(sum);
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void LogSoftmax(const SoftmaxParams& params,
			
 
				-                       const RuntimeShape& input_shape,
			
 
				-                       const uint8_t* input_data,
			
 
				-                       const RuntimeShape& output_shape, uint8_t* output_data) {
			
 
				-  const int32_t input_multiplier = params.input_multiplier;
			
 
				-  const int32_t input_left_shift = params.input_left_shift;
			
 
				-  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
			
 
				-  const int32_t reverse_scaling_right_shift =
			
 
				-      params.reverse_scaling_right_shift;
			
 
				-  const int diff_min = params.diff_min;
			
 
				-  // The representation chosen for the input to the exp() function is Q5.26.
			
 
				-  // We need to leave extra space since values that we skip might be as large
			
 
				-  // as -32 before multiplying by input_beta_multiplier, and therefore as
			
 
				-  // large as -16 afterwards.  Note that exp(-8) is definitely not
			
 
				-  // insignificant to accumulation, but exp(-16) definitely is.
			
 
				-  static constexpr int kScaledDiffIntegerBits = 5;
			
 
				-  static constexpr int kAccumulationIntegerBits = 12;
			
 
				-  static constexpr int kOutputIntegerBits = 4;
			
 
				-  using FixedPointScaledDiff =
			
 
				-      gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
			
 
				-  using FixedPointAccum =
			
 
				-      gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
			
 
				-
			
 
				-  const int trailing_dim = input_shape.DimensionsCount() - 1;
			
 
				-  const int outer_size =
			
 
				-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
			
 
				-  const int depth =
			
 
				-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
			
 
				-
			
 
				-  for (int i = 0; i < outer_size; ++i) {
			
 
				-    uint8_t max_in_row = 0;
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
			
 
				-    }
			
 
				-
			
 
				-    FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      int32_t input_diff =
			
 
				-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
			
 
				-      if (input_diff >= diff_min) {
			
 
				-        const int32_t input_diff_rescaled =
			
 
				-            MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				-                input_diff, input_multiplier, input_left_shift);
			
 
				-        const FixedPointScaledDiff scaled_diff_f8 =
			
 
				-            FixedPointScaledDiff::FromRaw(input_diff_rescaled);
			
 
				-        sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
			
 
				-                                        exp_on_negative_values(scaled_diff_f8));
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    const int32_t fixed_log_sum_of_exps =
			
 
				-        log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
			
 
				-            sum_of_exps)
			
 
				-            .raw();
			
 
				-
			
 
				-    // rescaled_diff_min is smallest representable in
			
 
				-    // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
			
 
				-    // log-sub-exps that will be subtracted in the loop.
			
 
				-    //
			
 
				-    // The thresholds diff_min, etc are negative.
			
 
				-    const int rescaled_diff_min =
			
 
				-        fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
			
 
				-    const int adjusted_diff_min =
			
 
				-        std::max(static_cast<int32_t>(
			
 
				-                     diff_min - 1),  // Note use of > below instead of >= above.
			
 
				-                 MultiplyByQuantizedMultiplierSmallerThanOneExp(
			
 
				-                     rescaled_diff_min, reverse_scaling_divisor,
			
 
				-                     -reverse_scaling_right_shift));
			
 
				-
			
 
				-    for (int c = 0; c < depth; ++c) {
			
 
				-      int32_t input_diff =
			
 
				-          static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
			
 
				-      if (input_diff > adjusted_diff_min) {
			
 
				-        const int32_t input_diff_rescaled =
			
 
				-            MultiplyByQuantizedMultiplierGreaterThanOne(
			
 
				-                input_diff, input_multiplier, input_left_shift);
			
 
				-        int32_t unsat_output =
			
 
				-            gemmlowp::RoundingDivideByPOT(
			
 
				-                (input_diff_rescaled - fixed_log_sum_of_exps),
			
 
				-                31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
			
 
				-            255;
			
 
				-
			
 
				-        output_data[i * depth + c] = static_cast<uint8_t>(
			
 
				-            std::max(std::min(unsat_output, static_cast<int32_t>(255)),
			
 
				-                     static_cast<int32_t>(0)));
			
 
				-      } else {
			
 
				-        // Set output to smallest value.
			
 
				-        output_data[i * depth + c] = 0;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void LogSoftmaxQuantized(const SoftmaxParams& params,
			
 
				-                                const size_t outer_size, const size_t depth,
			
 
				-                                const RuntimeShape& input_shape,
			
 
				-                                const T* input_data,
			
 
				-                                const RuntimeShape& output_shape,
			
 
				-                                T* output_data) {
			
 
				-  const int32_t input_multiplier = params.input_multiplier;
			
 
				-  const int32_t input_left_shift = params.input_left_shift;
			
 
				-  const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
			
 
				-  const int32_t reverse_scaling_right_shift =
			
 
				-      params.reverse_scaling_right_shift;
			
 
				-  const int diff_min = params.diff_min;
			
 
				-
			
 
				-  static constexpr T kMinT8 = std::numeric_limits<T>::min();
			
 
				-  static constexpr T kMaxT8 = std::numeric_limits<T>::max();
			
 
				-  static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
			
 
				-
			
 
				-  // All IntegerBits must agree with Prepare function.
			
 
				-  // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
			
 
				-  static constexpr int kInputIntegerBits = 5;
			
 
				-  static constexpr int kAccumulationIntegerBits = 12;
			
 
				-  static constexpr int kOutputIntegerBits = 4;
			
 
				-  using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
			
 
				-  using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
			
 
				-
			
 
				-  for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
			
 
				-    T max_in_row = kMinT8;
			
 
				-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				-      max_in_row =
			
 
				-          std::max(max_in_row, input_data[outer_index * depth + inner_index]);
			
 
				-    }
			
 
				-
			
 
				-    // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
			
 
				-    F12 sum_of_exps_in_q12 = F12::FromRaw(0);
			
 
				-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				-      int32_t input_diff =
			
 
				-          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
			
 
				-          max_in_row;
			
 
				-      if (input_diff >= diff_min) {
			
 
				-        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
			
 
				-            input_diff, input_multiplier, input_left_shift);
			
 
				-        sum_of_exps_in_q12 =
			
 
				-            sum_of_exps_in_q12 +
			
 
				-            gemmlowp::Rescale<kAccumulationIntegerBits>(
			
 
				-                exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    const int32_t log_sum_of_exps_in_q5 =
			
 
				-        log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
			
 
				-            sum_of_exps_in_q12)
			
 
				-            .raw();
			
 
				-
			
 
				-    // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
			
 
				-    // smallest representable in Q5.26 plus the log_sum_of_exps.
			
 
				-    const int32_t shifted_log_sum_of_exps_in_q5 =
			
 
				-        log_sum_of_exps_in_q5 + kMinInt32;
			
 
				-    const int32_t adjusted_diff_min =
			
 
				-        std::max(static_cast<int32_t>(diff_min - 1),
			
 
				-                 MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
			
 
				-                                               reverse_scaling_divisor,
			
 
				-                                               -reverse_scaling_right_shift));
			
 
				-
			
 
				-    for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
			
 
				-      int32_t input_diff =
			
 
				-          static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
			
 
				-          max_in_row;
			
 
				-      // Note use of > below instead of >= above.
			
 
				-      if (input_diff > adjusted_diff_min) {
			
 
				-        const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
			
 
				-            input_diff, input_multiplier, input_left_shift);
			
 
				-
			
 
				-        // Rescale and downcast.
			
 
				-        int32_t output_in_q27 =
			
 
				-            gemmlowp::RoundingDivideByPOT(
			
 
				-                (input_diff_in_q5 - log_sum_of_exps_in_q5),
			
 
				-                31 - kInputIntegerBits - kOutputIntegerBits) +
			
 
				-            kMaxT8;
			
 
				-
			
 
				-        output_in_q27 =
			
 
				-            std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
			
 
				-                     static_cast<int32_t>(kMinT8));
			
 
				-        output_data[outer_index * depth + inner_index] =
			
 
				-            static_cast<T>(output_in_q27);
			
 
				-      } else {
			
 
				-        output_data[outer_index * depth + inner_index] = kMinT8;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
			
 
				-                       const size_t depth, const RuntimeShape& input_shape,
			
 
				-                       const int8_t* input_data,
			
 
				-                       const RuntimeShape& output_shape, int8_t* output_data) {
			
 
				-  LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
			
 
				-                      output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h
@@ -1,132 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
			
 
				-
			
 
				-#include <cmath>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-#include "tensorflow/lite/kernels/op_macros.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
			
 
				-                     const RuntimeShape& output_shape, float* output_data) {
			
 
				-  const float cutoff_upper = 16.619047164916992188f;
			
 
				-  const float cutoff_lower = -9.f;
			
 
				-
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  // Rational for using approximation in reference kernel.
			
 
				-  // 0. This approximation gives enough precision for float.
			
 
				-  // 1. This works around an issue on an embedded chipset where exp() does not
			
 
				-  // return correctly as expected - exp(x) should return inf when overflown
			
 
				-  // not 1.701417   IEEE 754 defines representation for inf.
			
 
				-  // 2. This will speed up calculation and is matching the behavior in the
			
 
				-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    float val = input_data[i];
			
 
				-    float result;
			
 
				-    if (val > cutoff_upper) {
			
 
				-      result = 1.0f;
			
 
				-    } else if (val < cutoff_lower) {
			
 
				-      result = std::exp(val);
			
 
				-    } else {
			
 
				-      result = 1.f / (1.f + std::exp(-val));
			
 
				-    }
			
 
				-    output_data[i] = result;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Convenience version that allows, for example, generated-code calls to be
			
 
				-// uniform between data types.
			
 
				-inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
			
 
				-                     const float* input_data, const RuntimeShape& output_shape,
			
 
				-                     float* output_data) {
			
 
				-  // Drop params: not needed.
			
 
				-  Logistic(input_shape, input_data, output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-inline void Logistic(const LogisticParams& params,
			
 
				-                     const RuntimeShape& input_shape, const int16_t* input_data,
			
 
				-                     const RuntimeShape& output_shape, int16_t* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    // F0 uses 0 integer bits, range [-1, 1].
			
 
				-    // This is the return type of math functions such as tanh, logistic,
			
 
				-    // whose range is in [-1, 1].
			
 
				-    using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				-    // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
			
 
				-    using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
			
 
				-
			
 
				-    const F3 input = F3::FromRaw(input_data[i]);
			
 
				-    F0 output = gemmlowp::logistic(input);
			
 
				-    output_data[i] = output.raw();
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Quantized int8_t logistic activation.  Cheats by dequantizing and
			
 
				-// requantizing around the floating point logistic method.  This implementation
			
 
				-// is slow on platforms without a floating point unit.
			
 
				-
			
 
				-// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
			
 
				-// approach used in TFLite for int8_t Logistic.
			
 
				-inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
			
 
				-                     float input_scale, int input_zero_point,
			
 
				-                     const RuntimeShape& output_shape, int8_t* output_data,
			
 
				-                     float output_scale, int output_zero_point) {
			
 
				-  const float cutoff_upper = 16.619047164916992188f;
			
 
				-  const float cutoff_lower = -9.f;
			
 
				-
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  // Rational for using approximation in reference kernel.
			
 
				-  // 0. This approximation gives enough precision for float.
			
 
				-  // 1. This works around an issue on an embedded chipset where exp() does not
			
 
				-  // return correctly as expected - exp(x) should return inf when overflown
			
 
				-  // not 1.701417   IEEE 754 defines representation for inf.
			
 
				-  // 2. This will speed up calculation and is matching the behavior in the
			
 
				-  // optimized kernels. (check the definition of scalar_logistic_op<float>)
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; i++) {
			
 
				-    // Dequantize.
			
 
				-    float val =
			
 
				-        static_cast<float>((input_data[i] - input_zero_point) * input_scale);
			
 
				-    float result;
			
 
				-    if (val > cutoff_upper) {
			
 
				-      result = 1.0f;
			
 
				-    } else if (val < cutoff_lower) {
			
 
				-      result = std::exp(val);
			
 
				-    } else {
			
 
				-      result = 1.f / (1.f + std::exp(-val));
			
 
				-    }
			
 
				-    // Requantize
			
 
				-    int8_t output =
			
 
				-        static_cast<int8_t>(result / output_scale + output_zero_point);
			
 
				-    output_data[i] = output;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h
@@ -1,422 +0,0 @@
 
				-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cmath>
			
 
				-#include <cstdint>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline void LstmCell(
			
 
				-    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
			
 
				-    const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
			
 
				-    const float* prev_activ_data, const RuntimeShape& weights_shape,
			
 
				-    const float* weights_data, const RuntimeShape& unextended_bias_shape,
			
 
				-    const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
			
 
				-    const float* prev_state_data,
			
 
				-    const RuntimeShape& unextended_output_state_shape, float* output_state_data,
			
 
				-    const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
			
 
				-    const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
			
 
				-    const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
			
 
				-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
			
 
				-  const RuntimeShape input_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				-  const RuntimeShape prev_activ_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
			
 
				-  const RuntimeShape bias_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
			
 
				-  const RuntimeShape prev_state_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
			
 
				-  const RuntimeShape output_state_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
			
 
				-  const RuntimeShape output_activ_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
			
 
				-  const RuntimeShape concat_temp_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
			
 
				-  const RuntimeShape activ_temp_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
			
 
				-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
			
 
				-
			
 
				-  const int weights_dim_count = weights_shape.DimensionsCount();
			
 
				-  const int batches =
			
 
				-      MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
			
 
				-                  output_state_shape, 0, output_activ_shape, 0);
			
 
				-  const int height =
			
 
				-      MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
			
 
				-                  output_state_shape, 1, output_activ_shape, 1);
			
 
				-  const int width =
			
 
				-      MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
			
 
				-                  output_state_shape, 2, output_activ_shape, 2);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int prev_activ_depth = prev_activ_shape.Dims(3);
			
 
				-  const int total_input_depth = prev_activ_depth + input_depth;
			
 
				-  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
			
 
				-                   total_input_depth);
			
 
				-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
			
 
				-  const int intern_activ_depth =
			
 
				-      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
			
 
				-  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
			
 
				-                   intern_activ_depth * total_input_depth);
			
 
				-  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
			
 
				-  const int output_depth =
			
 
				-      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
			
 
				-                  3, output_activ_shape, 3);
			
 
				-  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
			
 
				-
			
 
				-  // Concatenate prev_activ and input data together
			
 
				-  float const* concat_input_arrays_data[2] = {input_data, prev_activ_data};
			
 
				-  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
			
 
				-                                                       &prev_activ_shape};
			
 
				-  tflite::ConcatenationParams concat_params;
			
 
				-  concat_params.axis = 3;
			
 
				-  concat_params.inputs_count = 2;
			
 
				-  Concatenation(concat_params, concat_input_arrays_shapes,
			
 
				-                concat_input_arrays_data, concat_temp_shape, concat_temp_data);
			
 
				-
			
 
				-  // Fully connected
			
 
				-  tflite::FullyConnectedParams fc_params;
			
 
				-  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
			
 
				-  fc_params.float_activation_max = std::numeric_limits<float>::max();
			
 
				-  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
			
 
				-                 weights_data, bias_shape, bias_data, activ_temp_shape,
			
 
				-                 activ_temp_data);
			
 
				-
			
 
				-  // Memory state update (the LSTM "guts")
			
 
				-  for (int b = 0; b < batches; ++b) {
			
 
				-    for (int w = 0; w < width; ++w) {
			
 
				-      for (int h = 0; h < height; ++h) {
			
 
				-        for (int c = 0; c < output_depth; ++c) {
			
 
				-          const float input_gate =
			
 
				-              1.f /
			
 
				-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
			
 
				-                                                      0 * output_depth + c)]));
			
 
				-          const float new_input = std::tanh(activ_temp_data[Offset(
			
 
				-              activ_temp_shape, b, h, w, 1 * output_depth + c)]);
			
 
				-          const float forget_gate =
			
 
				-              1.f /
			
 
				-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
			
 
				-                                                      2 * output_depth + c)]));
			
 
				-          const float output_gate =
			
 
				-              1.f /
			
 
				-              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
			
 
				-                                                      3 * output_depth + c)]));
			
 
				-          const float new_state =
			
 
				-              input_gate * new_input +
			
 
				-              forget_gate *
			
 
				-                  prev_state_data[Offset(prev_state_shape, b, h, w, c)];
			
 
				-          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
			
 
				-          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
			
 
				-              output_gate * std::tanh(new_state);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Quantized LSTM cell implementation.
			
 
				-// The quantization of the input, output arrays is as follows:
			
 
				-//  - The input activations are quantized as uint8 on the interval
			
 
				-//    [-1, 127/128].
			
 
				-//    The rationale for that is that is the natural interval for output
			
 
				-//    activations (see next point) and these need to be concatenated together.
			
 
				-//    We could accommodate different ranges by re-scaling, but we empirically
			
 
				-//    found that setting the input activations range to be [-1, 127/128] in the
			
 
				-//    first place, removing the need for re-scaling, greatly improves accuracy.
			
 
				-//  - The output activations are quantized as uint8 on the interval
			
 
				-//    [-1, 127/128].
			
 
				-//    The rationale for that is that the definition of a LSTM cell makes them
			
 
				-//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
			
 
				-//    makes for simpler, more accurate fixed-point arithmetic.
			
 
				-//  - The output-at-previous-timestep state array is obviously quantized as
			
 
				-//    the output activations.
			
 
				-//  - The internal LSTM memory (not the output-at-previous-timestep, the other
			
 
				-//    internal state array) is int16-quantized and may use any power-of-two,
			
 
				-//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
			
 
				-//    StateIntegerBits below, see the below discussion of that template
			
 
				-//    parameter ("The StateIntegerBits template parameter").
			
 
				-//  - The output of the internal fully-connected node is int16-quantized
			
 
				-//    on the interval [-8, 8 * 32767/32768], the rationale for which is
			
 
				-//    explained just below ("Why [-8, 8] for fully-connected output?").
			
 
				-//
			
 
				-//
			
 
				-// === The StateIntegerBits template parameter ===
			
 
				-//
			
 
				-// The StateIntegerBits template parameter controls the fixed-point format used
			
 
				-// to represent the internal memory of the LSTM cell (not the
			
 
				-// output-at-previous-timestep, the other internal state array). It's currently
			
 
				-// a template parameter so that the model can control that. The most typical
			
 
				-// value for StateIntegerBits is 4. Other plausible values are anywhere between
			
 
				-// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
			
 
				-// and drop that template parameter. The reason why it can't be a runtime
			
 
				-// parameter is that this controls the fixed-point format used, i.e. we need to
			
 
				-// generate actually different code based on it. In particular, we generate code
			
 
				-// for a fixed-point tanh() implementation for that format, which internally
			
 
				-// uses a fixed-point exp() implementation, which internally uses a
			
 
				-// barrel-shifter with a number of steps that depends on StateIntegerBits.
			
 
				-// Another consequence of that is that a higher value of StateIntegerBits
			
 
				-// results in a more expensive implementation (more barrel shifter steps
			
 
				-// needed).
			
 
				-//
			
 
				-//
			
 
				-// === Why [-8, 8] for fully-connected output? ===
			
 
				-//
			
 
				-// This array is only fed to Logistic and Tanh functions, for which
			
 
				-// the quantized implementation will want to use fixed-point arithmetic,
			
 
				-// requiring a power-of-two representation interval. Thus, we should right
			
 
				-// away quantize this array to a power-of-two interval; otherwise,
			
 
				-// implementation will need to rescale that, losing any benefit that a tighter
			
 
				-// representation interval might otherwise yield, while introducing some
			
 
				-// numerical error and computational overhead.
			
 
				-//
			
 
				-// Now, Logistic and Tanh
			
 
				-// are nearly constant (nearly equal to their horizontal asymptotes)
			
 
				-// outside of a small bounded interval around 0:
			
 
				-//
			
 
				-//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
			
 
				-//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
			
 
				-//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
			
 
				-//
			
 
				-// From this, we see that clamping to [-4, 4] would be too inaccurate
			
 
				-// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
			
 
				-// while clamping to [-16, 16] would make no difference even in float32.
			
 
				-// However, for a fixed-point implementation in 16-bit integers, using 5
			
 
				-// integer bits to represent the [-16, 16] range would leave only 11
			
 
				-// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
			
 
				-// representable values. Notice that is higher than the
			
 
				-// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
			
 
				-// Using [-8, 8] thus seems like the better compromise overall, enjoying
			
 
				-// an increment of 2.4e-4 between representable values and a worst-case
			
 
				-// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
			
 
				-// [-16, 16].
			
 
				-//
			
 
				-// Moreover, all other things being equal, it is nice to choose the narrower
			
 
				-// representation range, as that makes the implementation of fixed-point
			
 
				-// math functions a little cheaper (each integer bit requires an additional
			
 
				-// barrel-shifter atep in the implementation of exp(-x)). That is further
			
 
				-// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
			
 
				-// sense for 32-bit float or 32-bit fixed-point quantization, but we are
			
 
				-// aiming for 16-bit fixed-point quantization of these internal nodes here.
			
 
				-//
			
 
				-template <int StateIntegerBits>
			
 
				-inline void LstmCell(const LstmCellParams& params,
			
 
				-                     const RuntimeShape& unextended_input_shape,
			
 
				-                     const uint8_t* input_data_uint8,
			
 
				-                     const RuntimeShape& unextended_prev_activ_shape,
			
 
				-                     const uint8_t* prev_activ_data_uint8,
			
 
				-                     const RuntimeShape& weights_shape,
			
 
				-                     const uint8_t* weights_data_uint8,
			
 
				-                     const RuntimeShape& unextended_bias_shape,
			
 
				-                     const int32_t* bias_data_int32,
			
 
				-                     const RuntimeShape& unextended_prev_state_shape,
			
 
				-                     const int16_t* prev_state_data_int16,
			
 
				-                     const RuntimeShape& unextended_output_state_shape,
			
 
				-                     int16_t* output_state_data_int16,
			
 
				-                     const RuntimeShape& unextended_output_activ_shape,
			
 
				-                     uint8_t* output_activ_data_uint8,
			
 
				-                     const RuntimeShape& unextended_concat_temp_shape,
			
 
				-                     uint8_t* concat_temp_data_uint8,
			
 
				-                     const RuntimeShape& unextended_activ_temp_shape,
			
 
				-                     int16_t* activ_temp_data_int16, void* gemmlowp_context) {
			
 
				-  (void)gemmlowp_context;  // only used in optimized code.
			
 
				-  int32_t weights_zero_point = params.weights_zero_point;
			
 
				-  int32_t accum_multiplier = params.accum_multiplier;
			
 
				-  int accum_shift = params.accum_shift;
			
 
				-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
			
 
				-  const RuntimeShape input_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_input_shape);
			
 
				-  const RuntimeShape prev_activ_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
			
 
				-  const RuntimeShape bias_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
			
 
				-  const RuntimeShape prev_state_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
			
 
				-  const RuntimeShape output_state_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
			
 
				-  const RuntimeShape output_activ_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
			
 
				-  const RuntimeShape concat_temp_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
			
 
				-  const RuntimeShape activ_temp_shape =
			
 
				-      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
			
 
				-  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
			
 
				-
			
 
				-  // Gather dimensions information, and perform consistency checks.
			
 
				-  const int weights_dim_count = weights_shape.DimensionsCount();
			
 
				-  const int outer_size = MatchingFlatSizeSkipDim(
			
 
				-      input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
			
 
				-      output_activ_shape);
			
 
				-  const int input_depth = input_shape.Dims(3);
			
 
				-  const int prev_activ_depth = prev_activ_shape.Dims(3);
			
 
				-  const int total_input_depth = prev_activ_depth + input_depth;
			
 
				-  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
			
 
				-                   total_input_depth);
			
 
				-  const int intern_activ_depth =
			
 
				-      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
			
 
				-  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
			
 
				-                   intern_activ_depth * total_input_depth);
			
 
				-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
			
 
				-  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
			
 
				-  const int output_depth =
			
 
				-      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
			
 
				-                  3, output_activ_shape, 3);
			
 
				-  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
			
 
				-  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
			
 
				-  const int fc_output_depth =
			
 
				-      MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
			
 
				-  const int fc_accum_depth = total_input_depth;
			
 
				-  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
			
 
				-
			
 
				-  // Depth-concatenate prev_activ and input data together.
			
 
				-  uint8_t const* concat_input_arrays_data[2] = {input_data_uint8,
			
 
				-                                                prev_activ_data_uint8};
			
 
				-  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
			
 
				-                                                       &prev_activ_shape};
			
 
				-  tflite::ConcatenationParams concat_params;
			
 
				-  concat_params.axis = 3;
			
 
				-  concat_params.inputs_count = 2;
			
 
				-  Concatenation(concat_params, concat_input_arrays_shapes,
			
 
				-                concat_input_arrays_data, concat_temp_shape,
			
 
				-                concat_temp_data_uint8);
			
 
				-
			
 
				-  // Implementation of the fully connected node inside the LSTM cell.
			
 
				-  // The operands are 8-bit integers, the accumulators are internally 32bit
			
 
				-  // integers, and the output is 16-bit fixed-point with 3 integer bits so
			
 
				-  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
			
 
				-  // is explained in the function comment above.
			
 
				-  for (int b = 0; b < fc_batches; ++b) {
			
 
				-    for (int out_c = 0; out_c < fc_output_depth; ++out_c) {
			
 
				-      // Internal accumulation.
			
 
				-      // Initialize accumulator with the bias-value.
			
 
				-      int32_t accum = bias_data_int32[out_c];
			
 
				-      // Accumulation loop.
			
 
				-      for (int d = 0; d < fc_accum_depth; ++d) {
			
 
				-        int16_t input_val =
			
 
				-            concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
			
 
				-        int16_t weights_val =
			
 
				-            weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
			
 
				-        accum += input_val * weights_val;
			
 
				-      }
			
 
				-      // Down-scale the final int32 accumulator to the scale used by our
			
 
				-      // (16-bit, using 3 integer bits) fixed-point format. The quantized
			
 
				-      // multiplier and shift here have been pre-computed offline
			
 
				-      // (e.g. by toco).
			
 
				-      accum =
			
 
				-          MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
			
 
				-      // Saturate, cast to int16, and store to the temporary activations array.
			
 
				-      accum = std::max(-32768, std::min(32767, accum));
			
 
				-      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
			
 
				-  // and muls, all done in 16-bit fixed-point.
			
 
				-  for (int b = 0; b < outer_size; ++b) {
			
 
				-    for (int c = 0; c < output_depth; ++c) {
			
 
				-      // Define the fixed-point data types that we will use here. All use
			
 
				-      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
			
 
				-      // They only differ by the number of integral vs. fractional bits,
			
 
				-      // determining the range of values that they can represent.
			
 
				-      //
			
 
				-      // F0 uses 0 integer bits, range [-1, 1].
			
 
				-      // This is the return type of math functions such as tanh, logistic,
			
 
				-      // whose range is in [-1, 1].
			
 
				-      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				-      // F3 uses 3 integer bits, range [-8, 8].
			
 
				-      // This is the range of the previous fully-connected node's output,
			
 
				-      // which is our input here.
			
 
				-      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
			
 
				-      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
			
 
				-      // 2^StateIntegerBits]. It's used to represent the internal state, whose
			
 
				-      // number of integer bits is currently dictated by the model. See comment
			
 
				-      // on the StateIntegerBits template parameter above.
			
 
				-      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
			
 
				-      // Implementation of input gate, using fixed-point logistic function.
			
 
				-      F3 input_gate_input = F3::FromRaw(
			
 
				-          activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
			
 
				-      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
			
 
				-      // Implementation of input modulation gate, using fixed-point tanh
			
 
				-      // function.
			
 
				-      F3 input_modulation_gate_input = F3::FromRaw(
			
 
				-          activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
			
 
				-      F0 input_modulation_gate_output =
			
 
				-          gemmlowp::tanh(input_modulation_gate_input);
			
 
				-      // Implementation of forget gate, using fixed-point logistic function.
			
 
				-      F3 forget_gate_input = F3::FromRaw(
			
 
				-          activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
			
 
				-      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
			
 
				-      // Implementation of output gate, using fixed-point logistic function.
			
 
				-      F3 output_gate_input = F3::FromRaw(
			
 
				-          activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
			
 
				-      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
			
 
				-      // Implementation of internal multiplication nodes, still in fixed-point.
			
 
				-      F0 input_times_input_modulation =
			
 
				-          input_gate_output * input_modulation_gate_output;
			
 
				-      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
			
 
				-      FS prev_state_times_forget_state = forget_gate_output * prev_state;
			
 
				-      // Implementation of internal addition node, saturating.
			
 
				-      FS new_state = gemmlowp::SaturatingAdd(
			
 
				-          gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
			
 
				-          prev_state_times_forget_state);
			
 
				-      // Implementation of last internal Tanh node, still in fixed-point.
			
 
				-      // Since a Tanh fixed-point implementation is specialized for a given
			
 
				-      // number or integer bits, and each specialization can have a substantial
			
 
				-      // code size, and we already used above a Tanh on an input with 3 integer
			
 
				-      // bits, and per the table in the above function comment there is no
			
 
				-      // significant accuracy to be lost by clamping to [-8, +8] for a
			
 
				-      // 3-integer-bits representation, let us just do that. This helps people
			
 
				-      // porting this to targets where code footprint must be minimized.
			
 
				-      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
			
 
				-      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
			
 
				-      // Store the new internal state back to memory, as 16-bit integers.
			
 
				-      // Note: here we store the original value with StateIntegerBits, not
			
 
				-      // the rescaled 3-integer-bits value fed to tanh.
			
 
				-      output_state_data_int16[b * output_depth + c] = new_state.raw();
			
 
				-      // Down-scale the output activations to 8-bit integers, saturating,
			
 
				-      // and store back to memory.
			
 
				-      int16_t rescaled_output_activ =
			
 
				-          gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
			
 
				-      int16_t clamped_output_activ = std::max<int16_t>(
			
 
				-          -128, std::min<int16_t>(127, rescaled_output_activ));
			
 
				-      output_activ_data_uint8[b * output_depth + c] =
			
 
				-          128 + clamped_output_activ;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h
@@ -1,64 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T, typename Op, int N = 5>
			
 
				-void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
			
 
				-                                 const T* input1_data,
			
 
				-                                 const RuntimeShape& unextended_input2_shape,
			
 
				-                                 const T* input2_data,
			
 
				-                                 const RuntimeShape& unextended_output_shape,
			
 
				-                                 T* output_data, Op op) {
			
 
				-  // Uses element-wise calculation if broadcast is not required.
			
 
				-  if (unextended_input1_shape == unextended_input2_shape) {
			
 
				-    const int flat_size =
			
 
				-        MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
			
 
				-                             unextended_output_shape);
			
 
				-    for (int i = 0; i < flat_size; ++i) {
			
 
				-      output_data[i] = op(input1_data[i], input2_data[i]);
			
 
				-    }
			
 
				-  } else {
			
 
				-    TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
			
 
				-    TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
			
 
				-    TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
			
 
				-
			
 
				-    NdArrayDesc<N> desc1;
			
 
				-    NdArrayDesc<N> desc2;
			
 
				-    NdArrayDesc<N> output_desc;
			
 
				-    NdArrayDescsForElementwiseBroadcast(
			
 
				-        unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
			
 
				-    CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
			
 
				-                   &output_desc);
			
 
				-
			
 
				-    auto maxmin_func = [&](int indexes[N]) {
			
 
				-      output_data[SubscriptToIndex(output_desc, indexes)] =
			
 
				-          op(input1_data[SubscriptToIndex(desc1, indexes)],
			
 
				-             input2_data[SubscriptToIndex(desc2, indexes)]);
			
 
				-    };
			
 
				-    NDOpsHelper<N>(output_desc, maxmin_func);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h
@@ -1,37 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-template <typename T>
			
 
				-inline void Negate(const RuntimeShape& input_shape, const T* input_data,
			
 
				-                   const RuntimeShape& output_shape, T* output_data) {
			
 
				-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
			
 
				-
			
 
				-  for (int i = 0; i < flat_size; ++i) {
			
 
				-    output_data[i] = -input_data[i];
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h
@@ -1,169 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
			
 
				-
			
 
				-#include <vector>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-// TFLite Pad supports activation tensors with up to 5 dimensions.
			
 
				-constexpr int PadKernelMaxDimensionCount() { return 5; }
			
 
				-
			
 
				-// There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
			
 
				-// scalar input that provides the padding value.  Therefore pad_value_ptr can be
			
 
				-// equivalent to a simple input1_data.  For Pad, it should point to a zero
			
 
				-// value.
			
 
				-//
			
 
				-// Note that two typenames are required, so that T=P=int32_t is considered a
			
 
				-// specialization distinct from P=int32_t.
			
 
				-template <typename T, typename P>
			
 
				-inline void PadImpl(const tflite::PadParams& op_params,
			
 
				-                    const RuntimeShape& input_shape, const T* input_data,
			
 
				-                    const P* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				-                    T* output_data) {
			
 
				-  const RuntimeShape ext_input_shape =
			
 
				-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
			
 
				-  const RuntimeShape ext_output_shape =
			
 
				-      RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
			
 
				-  TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
			
 
				-  TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
			
 
				-
			
 
				-  // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
			
 
				-  // pad them to 5 dims (yes, we are "padding the padding").
			
 
				-  int left_padding_copy[PadKernelMaxDimensionCount()];
			
 
				-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
			
 
				-    left_padding_copy[i] = 0;
			
 
				-  }
			
 
				-  for (int i = 0; i < op_params.left_padding_count; ++i) {
			
 
				-    left_padding_copy[i + PadKernelMaxDimensionCount() -
			
 
				-                      op_params.left_padding_count] = op_params.left_padding[i];
			
 
				-  }
			
 
				-  int right_padding_copy[PadKernelMaxDimensionCount()];
			
 
				-  for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
			
 
				-    right_padding_copy[i] = 0;
			
 
				-  }
			
 
				-  for (int i = 0; i < op_params.right_padding_count; ++i) {
			
 
				-    right_padding_copy[i + PadKernelMaxDimensionCount() -
			
 
				-                       op_params.right_padding_count] =
			
 
				-        op_params.right_padding[i];
			
 
				-  }
			
 
				-
			
 
				-  const int output_batch = ext_output_shape.Dims(0);
			
 
				-  const int output_plane = ext_output_shape.Dims(1);
			
 
				-  const int output_height = ext_output_shape.Dims(2);
			
 
				-  const int output_width = ext_output_shape.Dims(3);
			
 
				-  const int output_depth = ext_output_shape.Dims(4);
			
 
				-
			
 
				-  const int left_b_padding = left_padding_copy[0];
			
 
				-  const int left_p_padding = left_padding_copy[1];
			
 
				-  const int left_h_padding = left_padding_copy[2];
			
 
				-  const int left_w_padding = left_padding_copy[3];
			
 
				-  const int left_d_padding = left_padding_copy[4];
			
 
				-
			
 
				-  const int right_b_padding = right_padding_copy[0];
			
 
				-  const int right_p_padding = right_padding_copy[1];
			
 
				-  const int right_h_padding = right_padding_copy[2];
			
 
				-  const int right_w_padding = right_padding_copy[3];
			
 
				-  const int right_d_padding = right_padding_copy[4];
			
 
				-
			
 
				-  const T pad_value = *pad_value_ptr;
			
 
				-
			
 
				-  const T* in_ptr = input_data;
			
 
				-  T* out_ptr = output_data;
			
 
				-  for (int out_b = 0; out_b < output_batch; ++out_b) {
			
 
				-    for (int out_p = 0; out_p < output_plane; ++out_p) {
			
 
				-      for (int out_h = 0; out_h < output_height; ++out_h) {
			
 
				-        for (int out_w = 0; out_w < output_width; ++out_w) {
			
 
				-          for (int out_d = 0; out_d < output_depth; ++out_d) {
			
 
				-            if (out_b < left_b_padding ||
			
 
				-                out_b >= output_batch - right_b_padding ||
			
 
				-                out_p < left_p_padding ||
			
 
				-                out_p >= output_plane - right_p_padding ||
			
 
				-                out_h < left_h_padding ||
			
 
				-                out_h >= output_height - right_h_padding ||
			
 
				-                out_w < left_w_padding ||
			
 
				-                out_w >= output_width - right_w_padding ||
			
 
				-                out_d < left_d_padding ||
			
 
				-                out_d >= output_depth - right_d_padding) {
			
 
				-              *out_ptr++ = pad_value;
			
 
				-            } else {
			
 
				-              *out_ptr++ = *in_ptr++;
			
 
				-            }
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <typename T, typename P>
			
 
				-inline void Pad(const tflite::PadParams& op_params,
			
 
				-                const RuntimeShape& input_shape, const T* input_data,
			
 
				-                const P* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				-                T* output_data) {
			
 
				-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				-          output_data);
			
 
				-}
			
 
				-
			
 
				-// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
			
 
				-template <typename T>
			
 
				-inline void Pad(const tflite::PadParams& op_params,
			
 
				-                const RuntimeShape& input_shape, const T* input_data,
			
 
				-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				-                T* output_data) {
			
 
				-  const T converted_pad_value = static_cast<T>(*pad_value_ptr);
			
 
				-  PadImpl(op_params, input_shape, input_data, &converted_pad_value,
			
 
				-          output_shape, output_data);
			
 
				-}
			
 
				-
			
 
				-// This version avoids conflicting template matching.
			
 
				-template <>
			
 
				-inline void Pad(const tflite::PadParams& op_params,
			
 
				-                const RuntimeShape& input_shape, const int32_t* input_data,
			
 
				-                const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
			
 
				-                int32_t* output_data) {
			
 
				-  PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				-          output_data);
			
 
				-}
			
 
				-
			
 
				-template <typename T, typename P>
			
 
				-inline void PadImageStyle(const tflite::PadParams& op_params,
			
 
				-                          const RuntimeShape& input_shape, const T* input_data,
			
 
				-                          const P* pad_value_ptr,
			
 
				-                          const RuntimeShape& output_shape, T* output_data) {
			
 
				-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				-      output_data);
			
 
				-}
			
 
				-
			
 
				-template <typename P>
			
 
				-inline void PadImageStyle(const tflite::PadParams& op_params,
			
 
				-                          const RuntimeShape& input_shape,
			
 
				-                          const float* input_data, const P* pad_value_ptr,
			
 
				-                          const RuntimeShape& output_shape,
			
 
				-                          float* output_data) {
			
 
				-  Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
			
 
				-      output_data);
			
 
				-}
			
 
				-
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -1,303 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/quantization_util.h"
			
 
				-#include "tensorflow/lite/kernels/internal/types.h"
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace reference_ops {
			
 
				-
			
 
				-inline bool AveragePool(const PoolParams& params,
			
 
				-                        const RuntimeShape& input_shape,
			
 
				-                        const float* input_data,
			
 
				-                        const RuntimeShape& output_shape, float* output_data) {
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          float total = 0.f;
			
 
				-          float filter_count = 0;
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              total +=
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				-              filter_count++;
			
 
				-            }
			
 
				-          }
			
 
				-          if (filter_count == 0) return false;
			
 
				-          const float average = total / filter_count;
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              ActivationFunctionWithMinMax(average, params.float_activation_min,
			
 
				-                                           params.float_activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return true;
			
 
				-}
			
 
				-
			
 
				-inline bool AveragePool(const PoolParams& params,
			
 
				-                        const RuntimeShape& input_shape,
			
 
				-                        const uint8_t* input_data,
			
 
				-                        const RuntimeShape& output_shape,
			
 
				-                        uint8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          int32_t acc = 0;
			
 
				-          int filter_count = 0;
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              acc +=
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				-              filter_count++;
			
 
				-            }
			
 
				-          }
			
 
				-          if (filter_count == 0) return false;
			
 
				-          acc = (acc + filter_count / 2) / filter_count;
			
 
				-          acc = std::max(acc, params.quantized_activation_min);
			
 
				-          acc = std::min(acc, params.quantized_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              static_cast<uint8_t>(acc);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-  return true;
			
 
				-}
			
 
				-
			
 
				-inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				-                   const float* input_data, const RuntimeShape& output_shape,
			
 
				-                   float* output_data) {
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          float sum_squares = 0.f;
			
 
				-          int filter_count = 0;
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              const float val =
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)];
			
 
				-              sum_squares += val * val;
			
 
				-              filter_count++;
			
 
				-            }
			
 
				-          }
			
 
				-          const float l2pool_result = std::sqrt(sum_squares / filter_count);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              ActivationFunctionWithMinMax(l2pool_result,
			
 
				-                                           params.float_activation_min,
			
 
				-                                           params.float_activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				-                    const float* input_data, const RuntimeShape& output_shape,
			
 
				-                    float* output_data) {
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          float max = std::numeric_limits<float>::lowest();
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              max = std::max(
			
 
				-                  max,
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				-            }
			
 
				-          }
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              ActivationFunctionWithMinMax(max, params.float_activation_min,
			
 
				-                                           params.float_activation_max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
			
 
				-                    const uint8_t* input_data, const RuntimeShape& output_shape,
			
 
				-                    uint8_t* output_data) {
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_min,
			
 
				-                   params.quantized_activation_max);
			
 
				-  TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
			
 
				-  TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
			
 
				-  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
			
 
				-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
			
 
				-  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
			
 
				-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
			
 
				-  const int input_height = input_shape.Dims(1);
			
 
				-  const int input_width = input_shape.Dims(2);
			
 
				-  const int output_height = output_shape.Dims(1);
			
 
				-  const int output_width = output_shape.Dims(2);
			
 
				-  const int stride_height = params.stride_height;
			
 
				-  const int stride_width = params.stride_width;
			
 
				-  for (int batch = 0; batch < batches; ++batch) {
			
 
				-    for (int out_y = 0; out_y < output_height; ++out_y) {
			
 
				-      for (int out_x = 0; out_x < output_width; ++out_x) {
			
 
				-        for (int channel = 0; channel < depth; ++channel) {
			
 
				-          const int in_x_origin =
			
 
				-              (out_x * stride_width) - params.padding_values.width;
			
 
				-          const int in_y_origin =
			
 
				-              (out_y * stride_height) - params.padding_values.height;
			
 
				-          // Compute the boundaries of the filter region clamped so as to
			
 
				-          // ensure that the filter window fits in the input array.
			
 
				-          const int filter_x_start = std::max(0, -in_x_origin);
			
 
				-          const int filter_x_end =
			
 
				-              std::min(params.filter_width, input_width - in_x_origin);
			
 
				-          const int filter_y_start = std::max(0, -in_y_origin);
			
 
				-          const int filter_y_end =
			
 
				-              std::min(params.filter_height, input_height - in_y_origin);
			
 
				-          uint8_t max = 0;
			
 
				-          for (int filter_y = filter_y_start; filter_y < filter_y_end;
			
 
				-               ++filter_y) {
			
 
				-            for (int filter_x = filter_x_start; filter_x < filter_x_end;
			
 
				-                 ++filter_x) {
			
 
				-              const int in_x = in_x_origin + filter_x;
			
 
				-              const int in_y = in_y_origin + filter_y;
			
 
				-              max = std::max(
			
 
				-                  max,
			
 
				-                  input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
			
 
				-            }
			
 
				-          }
			
 
				-          max = std::max<uint8_t>(max, params.quantized_activation_min);
			
 
				-          max = std::min<uint8_t>(max, params.quantized_activation_max);
			
 
				-          output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
			
 
				-              static_cast<uint8_t>(max);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-}  // namespace reference_ops
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -1,809 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#include <algorithm>
			
 
				-#include <cmath>
			
 
				-#include <cstdint>
			
 
				-#include <cstring>
			
 
				-#include <limits>
			
 
				-#include <utility>
			
 
				-
			
 
				-#include "fixedpoint/fixedpoint.h"
			
 
				-#include "tensorflow/lite/kernels/internal/common.h"
			
 
				-#include "tensorflow/lite/kernels/internal/compatibility.h"
			
 
				-#include "tensorflow/lite/kernels/internal/cppmath.h"
			
 
				-#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
			
 
				-
			
 
				-#if defined(_MSC_VER)
			
 
				-#define __restrict__ __restrict
			
 
				-#endif
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace tensor_utils {
			
 
				-
			
 
				-namespace {
			
 
				-const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
			
 
				-const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
			
 
				-}  // namespace
			
 
				-
			
 
				-void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                                     int8_t* quantized_values, float* min_value,
			
 
				-                                     float* max_value, float* scaling_factor) {
			
 
				-  auto minmax = std::minmax_element(values, values + size);
			
 
				-  *min_value = *minmax.first;
			
 
				-  *max_value = *minmax.second;
			
 
				-
			
 
				-  PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
			
 
				-                                  *max_value, scaling_factor);
			
 
				-}
			
 
				-
			
 
				-void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                                     int8_t* quantized_values, float min_value,
			
 
				-                                     float max_value, float* scaling_factor) {
			
 
				-  const int32_t kScale = 127;
			
 
				-  const float range = std::max(std::abs(min_value), std::abs(max_value));
			
 
				-  if (range == 0) {
			
 
				-    memset(quantized_values, 0, size * sizeof(int8_t));
			
 
				-    *scaling_factor = 1;
			
 
				-    return;
			
 
				-  }
			
 
				-  *scaling_factor = range / kScale;
			
 
				-  const float scaling_factor_inv = kScale / range;
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    const int32_t quantized_value =
			
 
				-        static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
			
 
				-    // Clamp: just in case some odd numeric offset.
			
 
				-    quantized_values[i] = static_cast<int8_t>(
			
 
				-        std::min(kScale, std::max(-kScale, quantized_value)));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableAsymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                                      int8_t* quantized_values,
			
 
				-                                      float* scaling_factor, int32_t* offset) {
			
 
				-  const int32_t kMinScale = -128;
			
 
				-  const int32_t kMaxScale = 127;
			
 
				-  const double qmin_double = kMinScale;
			
 
				-  const double qmax_double = kMaxScale;
			
 
				-  const auto minmax = std::minmax_element(values, values + size);
			
 
				-  const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
			
 
				-  const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
			
 
				-  if (rmin == rmax) {
			
 
				-    memset(quantized_values, 0, size * sizeof(int8_t));
			
 
				-    *scaling_factor = 1;
			
 
				-    *offset = 0;
			
 
				-    return;
			
 
				-  } else {
			
 
				-    double scale = (rmax - rmin) / (qmax_double - qmin_double);
			
 
				-    const double zero_point_from_min = qmin_double - rmin / scale;
			
 
				-    const double zero_point_from_max = qmax_double - rmax / scale;
			
 
				-    const double zero_point_from_min_error =
			
 
				-        std::abs(qmin_double) + std::abs(rmin / scale);
			
 
				-    const double zero_point_from_max_error =
			
 
				-        std::abs(qmax_double) + std::abs(rmax / scale);
			
 
				-    const double zero_point_double =
			
 
				-        zero_point_from_min_error < zero_point_from_max_error
			
 
				-            ? zero_point_from_min
			
 
				-            : zero_point_from_max;
			
 
				-    int8_t nudged_zero_point = 0;
			
 
				-    if (zero_point_double <= qmin_double) {
			
 
				-      nudged_zero_point = kMinScale;
			
 
				-    } else if (zero_point_double >= qmax_double) {
			
 
				-      nudged_zero_point = kMaxScale;
			
 
				-    } else {
			
 
				-      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
			
 
				-    }
			
 
				-    *scaling_factor = scale;
			
 
				-    *offset = nudged_zero_point;
			
 
				-  }
			
 
				-  const float scaling_factor_inv = 1.0f / *scaling_factor;
			
 
				-  for (int i = 0; i < size; ++i) {
			
 
				-    const int32_t quantized_value = static_cast<int32_t>(
			
 
				-        TfLiteRound(*offset + values[i] * scaling_factor_inv));
			
 
				-    quantized_values[i] =
			
 
				-        std::min(kMaxScale, std::max(kMinScale, quantized_value));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
			
 
				-                                                 int m_rows, int m_cols,
			
 
				-                                                 const float* vector,
			
 
				-                                                 int n_batch, float* result) {
			
 
				-  float* result_in_batch = result;
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    const float* matrix_ptr = matrix;
			
 
				-    for (int r = 0; r < m_rows; r++) {
			
 
				-      float dot_prod = 0.0f;
			
 
				-      const float* vector_in_batch = vector + b * m_cols;
			
 
				-      for (int c = 0; c < m_cols; c++) {
			
 
				-        dot_prod += *matrix_ptr++ * *vector_in_batch++;
			
 
				-      }
			
 
				-      *result_in_batch += dot_prod;
			
 
				-      ++result_in_batch;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				-    int n_batch, float* __restrict__ result) {
			
 
				-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
			
 
				-    const float batch_scaling_factor = scaling_factors[batch];
			
 
				-    // Get the address of the first row.
			
 
				-    const int8_t* row_ptr = matrix;
			
 
				-    for (int row = 0; row < m_rows; ++row) {
			
 
				-      // Initialize the dot product sum for the row to 0.
			
 
				-      int32_t dotprod = 0;
			
 
				-#if defined(__GNUC__)
			
 
				-      // Prefetch the row to cache.
			
 
				-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
			
 
				-                         3 /* temporal locality */);
			
 
				-#endif
			
 
				-      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
			
 
				-        dotprod += (*row_ptr) * (vectors[col]);
			
 
				-      }  // for col
			
 
				-      *result += dotprod * batch_scaling_factor;
			
 
				-      ++result;
			
 
				-    }  // for row
			
 
				-  }    // for batch
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
			
 
				-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
			
 
				-    bool* compute_row_sums, CpuBackendContext* context) {
			
 
				-  if (input_offset == nullptr) {
			
 
				-    PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-        matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
			
 
				-    return;
			
 
				-  }
			
 
				-  if (!compute_row_sums || *compute_row_sums) {
			
 
				-    PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
			
 
				-    if (compute_row_sums) {
			
 
				-      *compute_row_sums = false;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
			
 
				-    const float batch_scaling_factor = scaling_factors[batch];
			
 
				-    const int32_t batch_offset = input_offset[batch];
			
 
				-    const int8_t* row_ptr = matrix;
			
 
				-    for (int row = 0; row < m_rows; ++row) {
			
 
				-      int32_t dotprod = 0;
			
 
				-      float scale = batch_scaling_factor;
			
 
				-      if (per_channel_scale) {
			
 
				-        scale *= per_channel_scale[row];
			
 
				-      }
			
 
				-#if defined(__GNUC__)
			
 
				-      // Prefetch the row to cache.
			
 
				-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
			
 
				-                         3 /* temporal locality */);
			
 
				-#endif
			
 
				-      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
			
 
				-        dotprod += (*row_ptr) * vectors[col];
			
 
				-      }  // for col
			
 
				-      dotprod -= row_sums[row] * batch_offset;
			
 
				-      *result += dotprod * scale;
			
 
				-      ++result;
			
 
				-    }  // for row
			
 
				-  }    // for batch
			
 
				-}
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
			
 
				-  const int kBlockSize = 4;
			
 
				-  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
			
 
				-  for (int batch = 0; batch < n_batch; batch++) {
			
 
				-    const float* matrix_ptr = matrix;
			
 
				-    for (int row = 0; row < m_rows; row++) {
			
 
				-      float dot_prod = 0.0f;
			
 
				-      const float* vector_in_batch = vector + batch * m_cols;
			
 
				-      for (int i = segments[row]; i < segments[row + 1]; i++) {
			
 
				-        const int block_start_index = indices[i] * kBlockSize;
			
 
				-        const float* vector_block_in_batch_ptr =
			
 
				-            vector_in_batch + block_start_index;
			
 
				-        for (int c = 0; c < kBlockSize; c++) {
			
 
				-          dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
			
 
				-        }
			
 
				-      }
			
 
				-      result[batch * m_rows + row] += dot_prod;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
			
 
				-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
			
 
				-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
			
 
				-    const int32_t output_shift, const int32_t output_offset,
			
 
				-    const int32_t output_activation_min, const int32_t output_activation_max,
			
 
				-    int8_t* __restrict__ result) {
			
 
				-  const int kBlockSize = 16;
			
 
				-  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    const int8_t* matrix_ptr = matrix;
			
 
				-    for (int row = 0; row < m_rows; ++row) {
			
 
				-      int32_t dot_prod = 0;
			
 
				-      const int8_t* vector_in_batch = vector + batch * m_cols;
			
 
				-      for (int i = segments[row]; i < segments[row + 1]; ++i) {
			
 
				-        const int block_start_index = indices[i] * kBlockSize;
			
 
				-        const int8_t* vector_block_in_batch_ptr =
			
 
				-            vector_in_batch + block_start_index;
			
 
				-        for (int c = 0; c < kBlockSize; c++) {
			
 
				-          dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++;
			
 
				-          dot_prod += *matrix_ptr++ * input_offset;
			
 
				-        }
			
 
				-      }
			
 
				-      const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0;
			
 
				-      dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value,
			
 
				-                                               output_multiplier, output_shift);
			
 
				-      dot_prod += output_offset;
			
 
				-      result[batch * m_rows + row] =
			
 
				-          static_cast<int8_t>(ActivationFunctionWithMinMax(
			
 
				-              dot_prod, output_activation_min, output_activation_max));
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
			
 
				-    float* __restrict__ result) {
			
 
				-  const int kBlockSize = 16;
			
 
				-  TFLITE_DCHECK_EQ(  // NOLINT
			
 
				-      m_cols % kBlockSize, 0);
			
 
				-  for (int batch = 0; batch < n_batch; batch++) {
			
 
				-    const float* matrix_ptr = matrix;
			
 
				-    const uint8_t* ledger_ptr = ledger;
			
 
				-    for (int row = 0; row < m_rows; row++) {
			
 
				-      float dot_prod = 0.0f;
			
 
				-      int num_nonzero_blocks = *ledger_ptr++;
			
 
				-      if (num_nonzero_blocks > 0) {
			
 
				-        const float* vector_in_batch = vector + batch * m_cols;
			
 
				-        for (int i = 0; i < num_nonzero_blocks; i++) {
			
 
				-          const int block_start_index = *ledger_ptr++ * kBlockSize;
			
 
				-          const float* vector_block_in_batch_ptr =
			
 
				-              vector_in_batch + block_start_index;
			
 
				-          for (int c = 0; c < kBlockSize; c++) {
			
 
				-            dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
			
 
				-          }
			
 
				-        }
			
 
				-      }
			
 
				-      result[batch * m_rows + row] += dot_prod;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
			
 
				-    const int m_cols, const int8_t* __restrict__ vectors,
			
 
				-    const float* scaling_factors, int n_batch, float* __restrict__ result) {
			
 
				-  static const int kBlockSize = 16;
			
 
				-  TFLITE_DCHECK_EQ(  // NOLINT
			
 
				-      m_cols % kBlockSize, 0);
			
 
				-  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
			
 
				-    const float batch_scaling_factor = scaling_factors[batch];
			
 
				-    const uint8_t* ledger_ptr = ledger;
			
 
				-    // Get the address of the first row.
			
 
				-    const int8_t* row_ptr = matrix;
			
 
				-    for (int row = 0; row < m_rows; ++row) {
			
 
				-      // Initialize the dot product sum for the row to 0.
			
 
				-      int32_t dotprod = 0;
			
 
				-#if defined(__GNUC__)
			
 
				-      // Prefetch the row to cache.
			
 
				-      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
			
 
				-                         3 /* temporal locality */);
			
 
				-#endif
			
 
				-      int num_nonzero_blocks = *ledger_ptr++;
			
 
				-      for (int i = 0; i < num_nonzero_blocks; i++) {
			
 
				-        const int block_start_index = *ledger_ptr++ * kBlockSize;
			
 
				-        const int8_t* vector_block_ptr = vectors + block_start_index;
			
 
				-        for (int c = 0; c < kBlockSize; c++) {
			
 
				-          dotprod += (*row_ptr++) * (*vector_block_ptr++);
			
 
				-        }  // for block
			
 
				-      }    // for num_nonzero_blocks
			
 
				-      result[batch * m_rows + row] += dotprod * batch_scaling_factor;
			
 
				-    }  // for row
			
 
				-  }    // for batch
			
 
				-}
			
 
				-
			
 
				-template <typename T>
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulateImpl(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    T* output) {
			
 
				-  const int16_t output_max = std::numeric_limits<T>::max();
			
 
				-  const int16_t output_min = std::numeric_limits<T>::min();
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int row = 0; row < n_output; ++row) {
			
 
				-      int32_t acc = bias[row];
			
 
				-      for (int col = 0; col < n_input; ++col) {
			
 
				-        int8_t input_val = input[batch * n_input + col];
			
 
				-        int8_t weights_val = input_to_gate_weights[row * n_input + col];
			
 
				-        acc += input_val * weights_val;
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
			
 
				-      acc += output_zp;
			
 
				-      acc += output[batch * n_output + row];
			
 
				-      if (acc > output_max) {
			
 
				-        acc = output_max;
			
 
				-      }
			
 
				-      if (acc < output_min) {
			
 
				-        acc = output_min;
			
 
				-      }
			
 
				-      output[batch * n_output + row] = static_cast<T>(acc);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulateImpl(
			
 
				-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
			
 
				-      n_output, output_zp, output);
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulateImpl(
			
 
				-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
			
 
				-      n_output, output_zp, output);
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiply(const int8_t* input,
			
 
				-                                       int32_t input_zeropoint,
			
 
				-                                       const int8_t* input_to_gate_weights,
			
 
				-                                       int32_t input_to_gate_effective_scale_a,
			
 
				-                                       int32_t input_to_gate_effective_scale_b,
			
 
				-                                       int32_t n_batch, int32_t n_input,
			
 
				-                                       int32_t n_cell, int8_t* gate_output,
			
 
				-                                       int8_t gate_output_zp) {
			
 
				-  const int32_t int8_max = std::numeric_limits<int8_t>::max();
			
 
				-  const int32_t int8_min = std::numeric_limits<int8_t>::min();
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int row = 0; row < n_cell; ++row) {
			
 
				-      int32_t acc = 0;
			
 
				-      for (int col = 0; col < n_input; ++col) {
			
 
				-        int32_t input_val = input[batch * n_input + col];
			
 
				-        int8_t weights_val = input_to_gate_weights[row * n_input + col];
			
 
				-        acc += (input_val - input_zeropoint) * weights_val;
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
			
 
				-                                          input_to_gate_effective_scale_b);
			
 
				-      acc += gate_output_zp;
			
 
				-      if (acc > int8_max) {
			
 
				-        acc = int8_max;
			
 
				-      }
			
 
				-      if (acc < int8_min) {
			
 
				-        acc = int8_min;
			
 
				-      }
			
 
				-      gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiply(
			
 
				-    const int16_t* hidden, const int8_t* hidden_to_output_weights,
			
 
				-    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
			
 
				-    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
			
 
				-    int32_t n_output, int32_t output_zp, int8_t* proj_output) {
			
 
				-  const int16_t int8_max = std::numeric_limits<int8_t>::max();
			
 
				-  const int16_t int8_min = std::numeric_limits<int8_t>::min();
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int row = 0; row < n_output; ++row) {
			
 
				-      int64_t acc = gate_bias[row];
			
 
				-      for (int col = 0; col < n_hidden; ++col) {
			
 
				-        int16_t input_val = hidden[batch * n_hidden + col];
			
 
				-        int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
			
 
				-        int64_t curr = acc;
			
 
				-        acc += input_val * weights_val;
			
 
				-        if (input_val * weights_val > 0 && acc < curr) {
			
 
				-          acc = std::numeric_limits<int32_t>::max();
			
 
				-        }
			
 
				-        if (input_val * weights_val < 0 && acc > curr) {
			
 
				-          acc = std::numeric_limits<int32_t>::min();
			
 
				-        }
			
 
				-      }
			
 
				-      acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
			
 
				-                                          proj_effective_scale_b);
			
 
				-      acc += output_zp;
			
 
				-      if (acc > int8_max) {
			
 
				-        acc = int8_max;
			
 
				-      }
			
 
				-      if (acc < int8_min) {
			
 
				-        acc = int8_min;
			
 
				-      }
			
 
				-      proj_output[batch * n_output + row] = acc;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableApplyLayerNorm(const int16_t* input,
			
 
				-                            const int16_t* layer_norm_weights,
			
 
				-                            const int32_t* bias, int32_t layer_norm_scale_a,
			
 
				-                            int32_t layer_norm_scale_b, int32_t variance_limit,
			
 
				-                            int n_batch, int n_input, int16_t* output) {
			
 
				-  // The square of std::pow(2, 10), which is the extra factor that makes sure
			
 
				-  // normalized values has enough resolution.
			
 
				-  static const int kTwoToPower20 = 1 << 20;
			
 
				-  for (int i = 0; i < n_batch; ++i) {
			
 
				-    int64_t sum = 0;
			
 
				-    int64_t sum_sq = 0;
			
 
				-    for (int j = 0; j < n_input; ++j) {
			
 
				-      const int32_t index = i * n_input + j;
			
 
				-      int32_t val = static_cast<int32_t>(input[index]);
			
 
				-      sum += val;
			
 
				-      sum_sq += val * val;
			
 
				-    }
			
 
				-    int32_t mean =
			
 
				-        static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
			
 
				-    // TODO(b/173994730): Avoids overflow but only works for POT n_input.
			
 
				-    int32_t temp = kTwoToPower20 / n_input;
			
 
				-    int64_t variance =
			
 
				-        sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
			
 
				-    int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
			
 
				-    if (variance2 < 1) {
			
 
				-      variance2 = variance_limit;
			
 
				-    }
			
 
				-    int32_t stddev_inverse_a;
			
 
				-    int stddev_inverse_b;
			
 
				-    GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
			
 
				-                                     &stddev_inverse_a, &stddev_inverse_b);
			
 
				-
			
 
				-    for (int j = 0; j < n_input; ++j) {
			
 
				-      const int32_t index = i * n_input + j;
			
 
				-      int32_t val = static_cast<int32_t>(input[index]);
			
 
				-      int32_t shifted = 1024 * val - mean;
			
 
				-      int32_t rescaled = MultiplyByQuantizedMultiplier(
			
 
				-          shifted, stddev_inverse_a, stddev_inverse_b);
			
 
				-      // TODO(jianlijianli): Saturate this.
			
 
				-      int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
			
 
				-      int32_t val4 =
			
 
				-          static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
			
 
				-      int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
			
 
				-                                                   layer_norm_scale_b + 12);
			
 
				-      val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
			
 
				-      output[index] = static_cast<int16_t>(val5);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableApplyLayerNormFloat(const int16_t* input,
			
 
				-                                 const int16_t* layer_norm_weights,
			
 
				-                                 int32_t layer_norm_scale_a,
			
 
				-                                 int32_t layer_norm_scale_b,
			
 
				-                                 const int32_t* bias, int n_batch, int n_input,
			
 
				-                                 int16_t* output) {
			
 
				-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				-  const float layer_norm_scale =
			
 
				-      layer_norm_scale_a *
			
 
				-      std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
			
 
				-  const float bias_scale =
			
 
				-      static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
			
 
				-
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    float sum = 0.0f;
			
 
				-    float sum_sq = 0.0f;
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      const float value = static_cast<float>(input[index]);
			
 
				-      sum += value;
			
 
				-      sum_sq += value * value;
			
 
				-    }
			
 
				-    const float mean = sum / n_input;
			
 
				-    float stddev_inv = 0.0f;
			
 
				-    const float variance = sum_sq / n_input - mean * mean;
			
 
				-    if (variance == 0) {
			
 
				-      stddev_inv = 1.0f / std::sqrt(1e-8f);
			
 
				-    } else {
			
 
				-      stddev_inv = 1.0f / std::sqrt(variance);
			
 
				-    }
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      const float normalized_value =
			
 
				-          (static_cast<float>(input[index]) - mean) * stddev_inv;
			
 
				-      const float weighted_normalized_value =
			
 
				-          normalized_value * layer_norm_weights[i] * layer_norm_scale +
			
 
				-          bias[i] * bias_scale;
			
 
				-      const int32_t quant_output = static_cast<int32_t>(round(
			
 
				-          weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
			
 
				-      output[index] = std::min(int16_max, std::max(int16_min, quant_output));
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
			
 
				-                                            int32_t scalar, int32_t n_row,
			
 
				-                                            int32_t n_col, int32_t* output) {
			
 
				-  for (int i = 0; i < n_row; ++i) {
			
 
				-    int32_t row_sum = 0;
			
 
				-    for (int j = 0; j < n_col; ++j) {
			
 
				-      row_sum += *matrix++;
			
 
				-    }
			
 
				-    output[i] += row_sum * scalar;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
			
 
				-                          int32_t n_input, int16_t* output) {
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int c = 0; c < n_input; c++) {
			
 
				-      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
			
 
				-      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				-      const int index = batch * n_input + c;
			
 
				-      F3 sigmoid_input = F3::FromRaw(input[index]);
			
 
				-      F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
			
 
				-      output[index] = sigmoid_output.raw();
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
			
 
				-                               int32_t n_input, int16_t* output) {
			
 
				-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      const float float_input =
			
 
				-          input[index] * static_cast<float>(std::pow(2, -12));
			
 
				-      const float float_output = 1.0f / (1.0f + std::exp(-float_input));
			
 
				-      const int32_t quant_output = static_cast<int32_t>(
			
 
				-          float_output * static_cast<float>(std::pow(2, 15)));
			
 
				-      const int32_t quant_output_clamped =
			
 
				-          std::min(int16_max, std::max(int16_min, quant_output));
			
 
				-      output[index] = static_cast<int16_t>(quant_output_clamped);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-template <int IntegerBits>
			
 
				-void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
			
 
				-                           int32_t n_input, int16_t* output) {
			
 
				-  using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
			
 
				-  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      FX tanh_input = FX::FromRaw(input[index]);
			
 
				-      F0 tanh_output = gemmlowp::tanh(tanh_input);
			
 
				-      output[index] = tanh_output.raw();
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
			
 
				-                       int32_t n_batch, int32_t n_input, int16_t* output) {
			
 
				-  assert(integer_bits <= 6);
			
 
				-#define DISPATCH_TANH(i)                                       \
			
 
				-  case i:                                                      \
			
 
				-    PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
			
 
				-    break;
			
 
				-  switch (integer_bits) {
			
 
				-    DISPATCH_TANH(0);
			
 
				-    DISPATCH_TANH(1);
			
 
				-    DISPATCH_TANH(2);
			
 
				-    DISPATCH_TANH(3);
			
 
				-    DISPATCH_TANH(4);
			
 
				-    DISPATCH_TANH(5);
			
 
				-    DISPATCH_TANH(6);
			
 
				-    default:
			
 
				-      return;
			
 
				-  }
			
 
				-#undef DISPATCH_TANH
			
 
				-}
			
 
				-
			
 
				-void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
			
 
				-                            int32_t n_input, int32_t integer_bits,
			
 
				-                            int16_t* output) {
			
 
				-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				-  const double two = 2.0;
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      const float float_input =
			
 
				-          input[index] * std::pow(two, static_cast<double>(integer_bits));
			
 
				-      const float float_output = std::tanh(float_input);
			
 
				-      const int32_t quant_output = static_cast<int32_t>(
			
 
				-          float_output * static_cast<float>(std::pow(2, 15)));
			
 
				-      const int32_t quant_output_clamped =
			
 
				-          std::min(int16_max, std::max(int16_min, quant_output));
			
 
				-      output[index] = static_cast<int16_t>(quant_output_clamped);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				-                      int n_batch, int n_input, int shift, int16_t* output) {
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      const int16_t a = input_1[index];
			
 
				-      const int16_t b = input_2[index];
			
 
				-      const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
			
 
				-      output[index] =
			
 
				-          static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				-                      int32_t multiplier, int32_t shift, int32_t n_batch,
			
 
				-                      int32_t n_input, int32_t output_zp, int8_t* output) {
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      const int16_t a = input_1[index];
			
 
				-      const int16_t b = input_2[index];
			
 
				-      int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
			
 
				-      value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
			
 
				-      value -= output_zp;
			
 
				-      value = std::min(std::max(static_cast<int32_t>(-128), value),
			
 
				-                       static_cast<int32_t>(127));
			
 
				-
			
 
				-      output[index] = static_cast<int8_t>(value);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
			
 
				-                      int n_batch, int n_input, int16_t* output) {
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    for (int i = 0; i < n_input; ++i) {
			
 
				-      const int index = batch * n_input + i;
			
 
				-      int32_t sum = input_1[index] + input_2[index];
			
 
				-      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
			
 
				-      output[index] = static_cast<int16_t>(sum_clamped);
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
			
 
				-                                     int v_size) {
			
 
				-  float result = 0.0;
			
 
				-  for (int v = 0; v < v_size; v++) {
			
 
				-    result += *vector1++ * *vector2++;
			
 
				-  }
			
 
				-  return result;
			
 
				-}
			
 
				-
			
 
				-namespace {
			
 
				-inline int32_t VectorVectorDotProduct(const int16_t* vector1,
			
 
				-                                      const int16_t* vector2, int v_size) {
			
 
				-  int32_t result = 0;
			
 
				-  for (int v = 0; v < v_size; v++) {
			
 
				-    result += *vector1++ * *vector2++;
			
 
				-  }
			
 
				-  return result;
			
 
				-}
			
 
				-}  // namespace
			
 
				-
			
 
				-void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
			
 
				-                                              const int16_t* vector2,
			
 
				-                                              int v_size, int n_batch,
			
 
				-                                              int32_t* result) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
			
 
				-    vector1 += v_size;
			
 
				-    vector2 += v_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableVectorBatchVectorCwiseProductAccumulate(
			
 
				-    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
			
 
				-    int32_t multiplier, int shift, int16_t* result) {
			
 
				-  for (int b = 0; b < n_batch; b++) {
			
 
				-    for (int v = 0; v < v_size; v++) {
			
 
				-      int32_t prod = vector[v] * *batch_vector++;
			
 
				-      prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
			
 
				-      int32_t output = prod + *result;
			
 
				-      output = std::max(std::min(static_cast<int32_t>(32767), output),
			
 
				-                        static_cast<int32_t>(-32768));
			
 
				-      *result++ = output;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableSub1Vector(const float* vector, int v_size, float* result) {
			
 
				-  for (int v = 0; v < v_size; v++) {
			
 
				-    *result++ = 1.0f - *vector++;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
			
 
				-  static const int16_t kOne = 32767;
			
 
				-  for (int v = 0; v < v_size; v++) {
			
 
				-    *result++ = kOne - *vector++;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
			
 
				-                                  const float scale, float* result) {
			
 
				-  for (int v = 0; v < v_size; ++v) {
			
 
				-    *result++ = scale * *vector++;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
			
 
				-                                     float* __restrict__ output_vector,
			
 
				-                                     int v_size, int n_batch) {
			
 
				-  for (int batch = 0; batch < n_batch; ++batch) {
			
 
				-    float sum = 0.0f;
			
 
				-    for (int i = 0; i < v_size; ++i) {
			
 
				-      sum += input_vector[i];
			
 
				-    }
			
 
				-    const float mean = sum / v_size;
			
 
				-    float sum_diff_sq = 0.0f;
			
 
				-    for (int i = 0; i < v_size; ++i) {
			
 
				-      const float diff = input_vector[i] - mean;
			
 
				-      sum_diff_sq += diff * diff;
			
 
				-    }
			
 
				-    const float variance = sum_diff_sq / v_size;
			
 
				-    constexpr float kNormalizationConstant = 1e-8f;
			
 
				-    const float stddev_inv =
			
 
				-        1.0f / std::sqrt(variance + kNormalizationConstant);
			
 
				-    for (int i = 0; i < v_size; ++i) {
			
 
				-      output_vector[i] = (input_vector[i] - mean) * stddev_inv;
			
 
				-    }
			
 
				-    input_vector += v_size;
			
 
				-    output_vector += v_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
			
 
				-                                  const int8_t* recurrent, int8_t recurrent_zp,
			
 
				-                                  int32_t input_effective_scale_a,
			
 
				-                                  int32_t input_effective_scale_b,
			
 
				-                                  int32_t recurrent_effective_scale_a,
			
 
				-                                  int32_t recurrent_effective_scale_b,
			
 
				-                                  int32_t n_batch, int32_t n_cell,
			
 
				-                                  int16_t* output) {
			
 
				-  const int32_t int16_max = std::numeric_limits<int16_t>::max();
			
 
				-  const int32_t int16_min = std::numeric_limits<int16_t>::min();
			
 
				-  for (int i = 0; i < n_batch * n_cell; ++i) {
			
 
				-    int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
			
 
				-    int32_t h =
			
 
				-        static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
			
 
				-    int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
			
 
				-                                                     input_effective_scale_b);
			
 
				-    int32_t h_scaled = MultiplyByQuantizedMultiplier(
			
 
				-        h, recurrent_effective_scale_a, recurrent_effective_scale_b);
			
 
				-    int32_t y = h_scaled + x_scaled;
			
 
				-    if (y > int16_max) {
			
 
				-      y = int16_max;
			
 
				-    }
			
 
				-    if (y < int16_min) {
			
 
				-      y = int16_min;
			
 
				-    }
			
 
				-    output[i] = static_cast<int16_t>(y);
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-}  // namespace tensor_utils
			
 
				-}  // namespace tflite
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -1,333 +0,0 @@
 
				-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
			
 
				-
			
 
				-#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
			
 
				-
			
 
				-#if defined(_MSC_VER)
			
 
				-#define __restrict__ __restrict
			
 
				-#endif
			
 
				-
			
 
				-namespace tflite {
			
 
				-namespace tensor_utils {
			
 
				-
			
 
				-// Check if all entries of a vector are zero for float.
			
 
				-bool IsZeroVector(const float* vector, int v_size) {
			
 
				-  return PortableIsZeroVector(vector, v_size);
			
 
				-}
			
 
				-
			
 
				-// Check if all entries of a vector are zero for int8_t.
			
 
				-bool IsZeroVector(const int8_t* vector, int v_size) {
			
 
				-  return PortableIsZeroVector(vector, v_size);
			
 
				-}
			
 
				-
			
 
				-void SymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                             int8_t* quantized_values, float* min, float* max,
			
 
				-                             float* scaling_factor) {
			
 
				-  PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
			
 
				-                                  scaling_factor);
			
 
				-}
			
 
				-
			
 
				-void SymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                             int8_t* quantized_values, float min_value,
			
 
				-                             float max_value, float* scaling_factor) {
			
 
				-  PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
			
 
				-                                  max_value, scaling_factor);
			
 
				-}
			
 
				-
			
 
				-void AsymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                              int8_t* quantized_values, float* scaling_factor,
			
 
				-                              int32_t* offset) {
			
 
				-  PortableAsymmetricQuantizeFloats(values, size, quantized_values,
			
 
				-                                   scaling_factor, offset);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
			
 
				-                                         int m_cols, const float* vector,
			
 
				-                                         int n_batch, float* result) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
			
 
				-                                              n_batch, result);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
			
 
				-                                         const int m_rows, const int m_cols,
			
 
				-                                         const int8_t* __restrict__ vector,
			
 
				-                                         const float* scaling_factors,
			
 
				-                                         int n_batch,
			
 
				-                                         float* __restrict__ result) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
			
 
				-                                              scaling_factors, n_batch, result);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
			
 
				-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
			
 
				-    bool* compute_row_sums, CpuBackendContext* context) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-      matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
			
 
				-      per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
			
 
				-      context);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
			
 
				-                                         const int m_rows, const int m_cols,
			
 
				-                                         const int8_t* __restrict__ vector,
			
 
				-                                         const float* scaling_factors,
			
 
				-                                         int n_batch, int32_t* scratch,
			
 
				-                                         float* __restrict__ result,
			
 
				-                                         CpuBackendContext* context) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
			
 
				-                                              scaling_factors, n_batch, result);
			
 
				-}
			
 
				-
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
			
 
				-  PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				-      matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
			
 
				-}
			
 
				-
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
			
 
				-    float* __restrict__ result) {
			
 
				-  PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-      matrix, ledger, m_rows, m_cols, vector, n_batch, result);
			
 
				-}
			
 
				-
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate1x16(
			
 
				-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
			
 
				-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
			
 
				-    const int32_t output_shift, const int32_t output_offset,
			
 
				-    const int32_t output_activation_min, const int32_t output_activation_max,
			
 
				-
			
 
				-    int8_t* __restrict__ result) {
			
 
				-  PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
			
 
				-      matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
			
 
				-      input_offset, output_multiplier, output_shift, output_offset,
			
 
				-      output_activation_min, output_activation_max, result);
			
 
				-}
			
 
				-
			
 
				-void SparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
			
 
				-    const int m_cols, const int8_t* __restrict__ vectors,
			
 
				-    const float* scaling_factors, int n_batch, float* __restrict__ result) {
			
 
				-  PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-      matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
			
 
				-      result);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
			
 
				-      n_output, output_zp, scratch, output, context);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
			
 
				-  PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
			
 
				-      n_output, output_zp, scratch, output, context);
			
 
				-}
			
 
				-
			
 
				-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
			
 
				-                                    int32_t n_row, int32_t n_col,
			
 
				-                                    int32_t* output) {
			
 
				-  PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
			
 
				-                               const int8_t* input_to_gate_weights,
			
 
				-                               int32_t input_to_gate_effective_scale_a,
			
 
				-                               int32_t input_to_gate_effective_scale_b,
			
 
				-                               int32_t n_batch, int32_t n_input, int32_t n_cell,
			
 
				-                               int8_t* gate_output, int8_t gate_output_zp) {
			
 
				-  PortableMatrixBatchVectorMultiply(
			
 
				-      input, input_zeropoint, input_to_gate_weights,
			
 
				-      input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
			
 
				-      n_input, n_cell, gate_output, gate_output_zp);
			
 
				-}
			
 
				-
			
 
				-void MatrixBatchVectorMultiply(const int16_t* hidden,
			
 
				-                               const int8_t* hidden_to_output_weights,
			
 
				-                               int32_t proj_effective_scale_a,
			
 
				-                               int32_t proj_effective_scale_b,
			
 
				-                               const int32_t* gate_bias, int32_t n_batch,
			
 
				-                               int32_t n_hidden, int32_t n_output,
			
 
				-                               int32_t output_zp, int8_t* proj_output) {
			
 
				-  PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
			
 
				-                                    proj_effective_scale_a,
			
 
				-                                    proj_effective_scale_b, gate_bias, n_batch,
			
 
				-                                    n_hidden, n_output, output_zp, proj_output);
			
 
				-}
			
 
				-
			
 
				-void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
			
 
				-                    const int32_t* bias, int32_t layer_norm_scale_a,
			
 
				-                    int32_t layer_norm_scale_b, int32_t variance_limit,
			
 
				-                    int n_batch, int n_input, int16_t* output) {
			
 
				-  PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
			
 
				-                         layer_norm_scale_b, variance_limit, n_batch, n_input,
			
 
				-                         output);
			
 
				-}
			
 
				-
			
 
				-void ApplyLayerNormFloat(const int16_t* input,
			
 
				-                         const int16_t* layer_norm_weights,
			
 
				-                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
			
 
				-                         const int32_t* bias, int n_batch, int n_input,
			
 
				-                         int16_t* output) {
			
 
				-  PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
			
 
				-                              layer_norm_scale_b, bias, n_batch, n_input,
			
 
				-                              output);
			
 
				-}
			
 
				-
			
 
				-void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
			
 
				-                  int16_t* output) {
			
 
				-  PortableApplySigmoid(input, n_batch, n_input, output);
			
 
				-}
			
 
				-
			
 
				-void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
			
 
				-                       int16_t* output) {
			
 
				-  PortableApplySigmoidFloat(input, n_batch, n_input, output);
			
 
				-}
			
 
				-
			
 
				-void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
			
 
				-               int32_t n_input, int16_t* output) {
			
 
				-  PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
			
 
				-}
			
 
				-
			
 
				-void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
			
 
				-                    int32_t integer_bits, int16_t* output) {
			
 
				-  PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
			
 
				-}
			
 
				-
			
 
				-void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
			
 
				-              int n_input, int shift, int16_t* output) {
			
 
				-  PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
			
 
				-}
			
 
				-
			
 
				-void CwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				-              int32_t multiplier, int32_t shift, int32_t n_batch,
			
 
				-              int32_t n_input, int32_t output_zp, int8_t* output) {
			
 
				-  PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
			
 
				-                   output_zp, output);
			
 
				-}
			
 
				-
			
 
				-void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
			
 
				-              int n_input, int16_t* output) {
			
 
				-  PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
			
 
				-}
			
 
				-
			
 
				-void CwiseClipping(float* vector, const int v_size,
			
 
				-                   const float clipping_value) {
			
 
				-  PortableCwiseClipping(vector, v_size, clipping_value);
			
 
				-}
			
 
				-
			
 
				-void CwiseClipping(int16_t* vector, const int v_size,
			
 
				-                   const int16_t clipping_value) {
			
 
				-  PortableCwiseClipping(vector, v_size, clipping_value);
			
 
				-}
			
 
				-
			
 
				-void CwiseClipping(int8_t* vector, const int v_size,
			
 
				-                   const int8_t clipping_value) {
			
 
				-  PortableCwiseClipping(vector, v_size, clipping_value);
			
 
				-}
			
 
				-
			
 
				-void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
			
 
				-                                             const int16_t* batch_vector,
			
 
				-                                             int n_batch, int32_t multiplier,
			
 
				-                                             int shift, int16_t* result) {
			
 
				-  PortableVectorBatchVectorCwiseProductAccumulate(
			
 
				-      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
			
 
				-}
			
 
				-
			
 
				-float VectorVectorDotProduct(const float* vector1, const float* vector2,
			
 
				-                             int v_size) {
			
 
				-  return PortableVectorVectorDotProduct(vector1, vector2, v_size);
			
 
				-}
			
 
				-
			
 
				-void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
			
 
				-                                      const int16_t* vector2, int v_size,
			
 
				-                                      int n_batch, int32_t* result) {
			
 
				-  PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
			
 
				-                                           result);
			
 
				-}
			
 
				-
			
 
				-void Sub1Vector(const float* vector, int v_size, float* result) {
			
 
				-  PortableSub1Vector(vector, v_size, result);
			
 
				-}
			
 
				-
			
 
				-void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
			
 
				-  PortableSub1Vector(vector, v_size, result);
			
 
				-}
			
 
				-
			
 
				-// Multiply all elements of vector with a scalar.
			
 
				-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
			
 
				-                          float* result) {
			
 
				-  PortableVectorScalarMultiply(vector, v_size, scale, result);
			
 
				-}
			
 
				-
			
 
				-void ReductionSumVector(const float* input_vector, float* output_vector,
			
 
				-                        int output_size, int reduction_size) {
			
 
				-  PortableReductionSumVector(input_vector, output_vector, output_size,
			
 
				-                             reduction_size);
			
 
				-}
			
 
				-
			
 
				-void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
			
 
				-                        int output_size, int reduction_size) {
			
 
				-  PortableReductionSumVector(input_vector, output_vector, output_size,
			
 
				-                             reduction_size);
			
 
				-}
			
 
				-
			
 
				-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
			
 
				-                        int output_size, int reduction_size) {
			
 
				-  PortableReductionSumVector(input_vector, output_vector, output_size,
			
 
				-                             reduction_size);
			
 
				-}
			
 
				-
			
 
				-void MeanStddevNormalization(const float* input_vector, float* output_vector,
			
 
				-                             int v_size, int n_batch) {
			
 
				-  PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
			
 
				-}
			
 
				-
			
 
				-void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
			
 
				-                          const int8_t* recurrent, int8_t recurrent_zp,
			
 
				-                          int32_t input_effective_scale_a,
			
 
				-                          int32_t input_effective_scale_b,
			
 
				-                          int32_t recurrent_effective_scale_a,
			
 
				-                          int32_t recurrent_effective_scale_b, int32_t n_batch,
			
 
				-                          int32_t n_cell, int16_t* output) {
			
 
				-  PortableTwoGateSaturatingAdd(
			
 
				-      input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
			
 
				-      input_effective_scale_b, recurrent_effective_scale_a,
			
 
				-      recurrent_effective_scale_b, n_batch, n_cell, output);
			
 
				-}
			
 
				-
			
 
				-}  // namespace tensor_utils
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
			
--- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@@ -1,244 +0,0 @@
 
				-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
			
 
				-
			
 
				-Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-you may not use this file except in compliance with the License.
			
 
				-You may obtain a copy of the License at
			
 
				-
			
 
				-    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-
			
 
				-Unless required by applicable law or agreed to in writing, software
			
 
				-distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-See the License for the specific language governing permissions and
			
 
				-limitations under the License.
			
 
				-==============================================================================*/
			
 
				-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
			
 
				-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
			
 
				-
			
 
				-#include <algorithm>
			
 
				-#include <cstdint>
			
 
				-
			
 
				-#if defined(_MSC_VER)
			
 
				-#define __restrict__ __restrict
			
 
				-#endif
			
 
				-
			
 
				-namespace tflite {
			
 
				-
			
 
				-// Not all backends support CpuBackendContext usage, so forward declare to avoid
			
 
				-// pulling in its implementation.
			
 
				-class CpuBackendContext;
			
 
				-
			
 
				-namespace tensor_utils {
			
 
				-
			
 
				-template <typename T>
			
 
				-bool PortableIsZeroVector(const T* vector, int v_size) {
			
 
				-  for (int i = 0; i < v_size; ++i) {
			
 
				-    if (vector[i] != 0) {
			
 
				-      return false;
			
 
				-    }
			
 
				-  }
			
 
				-  return true;
			
 
				-}
			
 
				-
			
 
				-void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                                     int8_t* quantized_values, float* min_value,
			
 
				-                                     float* max_value, float* scaling_factor);
			
 
				-
			
 
				-void PortableSymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                                     int8_t* quantized_values, float min_value,
			
 
				-                                     float max_value, float* scaling_factor);
			
 
				-
			
 
				-void PortableAsymmetricQuantizeFloats(const float* values, const int size,
			
 
				-                                      int8_t* quantized_values,
			
 
				-                                      float* scaling_factor, int32_t* offset);
			
 
				-
			
 
				-// Multiply a matrix by a batch vector, and store results in a batch-size
			
 
				-// vector.
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
			
 
				-                                                 int m_rows, int m_cols,
			
 
				-                                                 const float* vector,
			
 
				-                                                 int n_batch, float* result);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				-    int n_batch, float* __restrict__ result);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vectors, const float* scaling_factors,
			
 
				-    int n_batch, float* __restrict__ result, const float* per_channel_scale,
			
 
				-    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
			
 
				-    bool* compute_row_sums, CpuBackendContext* context);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
			
 
				-    const int8_t* __restrict__ vector, const float* scaling_factors,
			
 
				-    int n_batch, int32_t* scratch, float* __restrict__ result,
			
 
				-    CpuBackendContext* context);
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
			
 
				-    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
			
 
				-    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
			
 
				-    float* __restrict__ result);
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
			
 
				-    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
			
 
				-    const int32_t* __restrict__ indices, int m_rows, int m_cols,
			
 
				-    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
			
 
				-    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
			
 
				-    const int32_t output_shift, const int32_t output_offset,
			
 
				-    const int32_t output_activation_min, const int32_t output_activation_max,
			
 
				-    int8_t* __restrict__ result);
			
 
				-
			
 
				-void PortableSparseMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
			
 
				-    const int m_cols, const int8_t* __restrict__ vectors,
			
 
				-    const float* scaling_factors, int n_batch, float* __restrict__ result);
			
 
				-
			
 
				-// Dot product of two vectors.
			
 
				-float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
			
 
				-                                     int v_size);
			
 
				-
			
 
				-void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
			
 
				-                                              const int16_t* vector2,
			
 
				-                                              int v_size, int n_batch,
			
 
				-                                              int32_t* result);
			
 
				-
			
 
				-void PortableVectorBatchVectorCwiseProductAccumulate(
			
 
				-    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
			
 
				-    int32_t multiplier, int shift, int16_t* result);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    int32_t* scratch, int16_t* output, CpuBackendContext* context);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiplyAccumulate(
			
 
				-    const int8_t* input, const int32_t* bias,
			
 
				-    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
			
 
				-    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
			
 
				-    int32_t* scratch, int8_t* output, CpuBackendContext* context);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiply(const int8_t* input,
			
 
				-                                       int32_t input_zeropoint,
			
 
				-                                       const int8_t* input_to_gate_weights,
			
 
				-                                       int32_t input_to_gate_effective_scale_a,
			
 
				-                                       int32_t input_to_gate_effective_scale_b,
			
 
				-                                       int32_t n_batch, int32_t n_input,
			
 
				-                                       int32_t n_cell, int8_t* gate_output,
			
 
				-                                       int8_t gate_output_zp);
			
 
				-
			
 
				-void PortableMatrixBatchVectorMultiply(
			
 
				-    const int16_t* hidden, const int8_t* hidden_to_output_weights,
			
 
				-    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
			
 
				-    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
			
 
				-    int32_t n_output, int32_t output_zp, int8_t* proj_output);
			
 
				-
			
 
				-void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
			
 
				-                                            int32_t scalar, int32_t n_row,
			
 
				-                                            int32_t n_col, int32_t* output);
			
 
				-
			
 
				-void PortableApplyLayerNorm(const int16_t* input,
			
 
				-                            const int16_t* layer_norm_weights,
			
 
				-                            const int32_t* bias, int32_t layer_norm_scale_a,
			
 
				-                            int32_t layer_norm_scale_b, int32_t variance_limit,
			
 
				-                            int n_batch, int n_input, int16_t* output);
			
 
				-
			
 
				-void PortableApplyLayerNormFloat(const int16_t* input,
			
 
				-                                 const int16_t* layer_norm_weights,
			
 
				-                                 int32_t layer_norm_scale_a,
			
 
				-                                 int32_t layer_norm_scale_b,
			
 
				-                                 const int32_t* bias, int n_batch, int n_input,
			
 
				-                                 int16_t* output);
			
 
				-
			
 
				-void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
			
 
				-                          int32_t n_input, int16_t* output);
			
 
				-
			
 
				-void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
			
 
				-                               int32_t n_input, int16_t* output);
			
 
				-
			
 
				-void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
			
 
				-                       int32_t n_batch, int32_t n_input, int16_t* output);
			
 
				-
			
 
				-void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
			
 
				-                            int32_t n_input, int32_t integer_bits,
			
 
				-                            int16_t* output);
			
 
				-
			
 
				-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				-                      int n_batch, int n_input, int shift, int16_t* output);
			
 
				-
			
 
				-void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
			
 
				-                      int32_t multiplier, int32_t shift, int32_t n_batch,
			
 
				-                      int32_t n_input, int32_t output_zp, int8_t* output);
			
 
				-
			
 
				-void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
			
 
				-                      int n_batch, int n_input, int16_t* output);
			
 
				-
			
 
				-template <typename T>
			
 
				-void PortableCwiseClipping(T* vector, const int v_size,
			
 
				-                           const T& clipping_value) {
			
 
				-  for (int i = 0; i < v_size; i++) {
			
 
				-    vector[i] = std::max(std::min(clipping_value, vector[i]),
			
 
				-                         static_cast<T>(-clipping_value));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Batch vector initialization with another vector.
			
 
				-void PortableVectorBatchVectorAssign(const float* vector, int v_size,
			
 
				-                                     int n_batch, float* batch_vector);
			
 
				-
			
 
				-// Compute "1.0f - elements of vector" (used in CIFG).
			
 
				-void PortableSub1Vector(const float* vector, int v_size, float* result);
			
 
				-
			
 
				-void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
			
 
				-
			
 
				-// Multiply all elements of vector with a scalar.
			
 
				-void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
			
 
				-                                  float* result);
			
 
				-
			
 
				-// Reduce-sum on a vector:
			
 
				-// input_vector: pointer to input vector.
			
 
				-// output_vector: pointer to vector.
			
 
				-// output_size: output vector size.
			
 
				-// reduction_size: number of consecutive elements from input vector which are
			
 
				-// added to get one element of output.
			
 
				-template <typename INPUT, typename OUTPUT>
			
 
				-void PortableReductionSumVector(const INPUT* input_vector,
			
 
				-                                OUTPUT* output_vector, int output_size,
			
 
				-                                int reduction_size) {
			
 
				-  for (int o = 0; o < output_size; o++) {
			
 
				-    OUTPUT result = 0;
			
 
				-    for (int r = 0; r < reduction_size; r++) {
			
 
				-      result += input_vector[r];
			
 
				-    }
			
 
				-    output_vector[o] = result;
			
 
				-    input_vector += reduction_size;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-// Layer norm for each batch.
			
 
				-void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
			
 
				-                                     float* __restrict__ output_vector,
			
 
				-                                     int v_size, int n_batch);
			
 
				-
			
 
				-// Saturate Add.
			
 
				-void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
			
 
				-                                  const int8_t* recurrent, int8_t recurrent_zp,
			
 
				-                                  int32_t input_effective_scale_a,
			
 
				-                                  int32_t input_effective_scale_b,
			
 
				-                                  int32_t recurrent_effective_scale_a,
			
 
				-                                  int32_t recurrent_effective_scale_b,
			
 
				-                                  int32_t n_batch, int32_t n_cell,
			
 
				-                                  int16_t* output);
			
 
				-
			
 
				-}  // namespace tensor_utils
			
 
				-}  // namespace tflite
			
 
				-
			
 
				-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_